ARIMA

--- Import libraries for ARIMA----¶

In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from pmdarima import auto_arima
import warnings
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')

# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: Train/Test Split ---
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]

print(f"Train size: {len(train_data)}")
print(f"Validation size: {len(val_data)}")
print(f"Test size: {len(test_data)}")

# --- Step 2: Stationarity Check ---
def check_stationarity(series):
    """Check if series is stationary using ADF test"""
    result = adfuller(series)
    print(f"ADF Statistic: {result[0]:.4f}")
    print(f"p-value: {result[1]:.4f}")
    print("Critical Values:")
    for key, value in result[4].items():
        print(f"   {key}: {value:.4f}")
    return result[1] < 0.05  # Return True if stationary

print("\nChecking stationarity of training data...")
is_stationary = check_stationarity(train_data)
print(f"Data is {'stationary' if is_stationary else 'non-stationary'}")

# --- Step 3: Auto ARIMA for Optimal Parameters ---
print("\n" + "="*50)
print("AUTO ARIMA PARAMETER SEARCH")
print("="*50)

# Auto ARIMA for optimal parameters
auto_arima_model = auto_arima(
    train_data,
    seasonal=False,  # Non-seasonal ARIMA
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore',
    trace=True,
    information_criterion='aic'
)

print(f"Optimal ARIMA order: {auto_arima_model.order}")

# --- Step 4: Build and Train ARIMA Model ---
print("\n" + "="*50)
print("ARIMA MODEL TRAINING")
print("="*50)

arima_model = ARIMA(train_data, order=auto_arima_model.order)
arima_result = arima_model.fit()

print("\nARIMA MODEL SUMMARY")
print("="*50)
print(arima_result.summary())

# ARIMA forecasting
arima_forecast = arima_result.forecast(steps=len(val_data))
arima_val_pred = arima_forecast

# --- Step 5: Auto SARIMA for Optimal Parameters ---
print("\n" + "="*50)
print("AUTO SARIMA PARAMETER SEARCH")
print("="*50)

# Auto SARIMA for optimal parameters (with seasonal component)
auto_sarima_model = auto_arima(
    train_data,
    seasonal=True,
    m=26,  # Weekly data with yearly seasonality
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore',
    trace=True,
    information_criterion='aic'
)

print(f"Optimal SARIMA order: {auto_sarima_model.order}")
print(f"Optimal Seasonal order: {auto_sarima_model.seasonal_order}")

# --- Step 6: Build and Train SARIMA Model ---
print("\n" + "="*50)
print("SARIMA MODEL TRAINING")
print("="*50)

sarima_model = SARIMAX(
    train_data,
    order=auto_sarima_model.order,
    seasonal_order=auto_sarima_model.seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)

sarima_result = sarima_model.fit(disp=False)

print("\nSARIMA MODEL SUMMARY")
print("="*50)
print(sarima_result.summary())

# SARIMA forecasting
sarima_forecast = sarima_result.forecast(steps=len(val_data))
sarima_val_pred = sarima_forecast

# --- Step 7: Model Evaluation on Validation Set ---
def evaluate_model(actual, predicted, model_name):
    """Comprehensive model evaluation"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    mape = mean_absolute_percentage_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    predicted_diff = np.sign(np.diff(predicted))
    da = (np.sum(actual_diff == predicted_diff) / len(actual_diff)) * 100
    
    print(f"\n{model_name} VALIDATION PERFORMANCE:")
    print("="*40)
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"R²: {r2:.4f}")
    print(f"Directional Accuracy: {da:.2f}%")
    
    return {
        'MSE': mse, 'RMSE': rmse, 'MAE': mae, 
        'MAPE': mape, 'R²': r2, 'DA': da
    }

# Evaluate both models on validation set
arima_metrics = evaluate_model(val_data, arima_val_pred, "ARIMA")
sarima_metrics = evaluate_model(val_data, sarima_val_pred, "SARIMA")

# --- Step 8: Final Model Selection and Test Forecasting ---
# Select the best model based on validation performance
if arima_metrics['RMSE'] < sarima_metrics['RMSE']:
    print("\nSELECTED BEST MODEL: ARIMA")
    best_model = arima_result
    best_model_name = "ARIMA"
    best_order = auto_arima_model.order
else:
    print("\nSELECTED BEST MODEL: SARIMA")
    best_model = sarima_result
    best_model_name = "SARIMA"
    best_order = f"{auto_sarima_model.order}{auto_sarima_model.seasonal_order}"

# Retrain best model on train + validation data
final_train_data = np.concatenate([train_data, val_data])

if best_model_name == "ARIMA":
    final_model = ARIMA(final_train_data, order=best_order)
    final_result = final_model.fit()
else:
    final_model = SARIMAX(
        final_train_data,
        order=auto_sarima_model.order,
        seasonal_order=auto_sarima_model.seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    final_result = final_model.fit(disp=False)

# Final forecasting on test set
test_forecast = final_result.forecast(steps=len(test_data))

# --- Step 9: Final Evaluation on Test Set ---
final_metrics = evaluate_model(test_data, test_forecast, f"FINAL {best_model_name}")

# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]


# Plot 1: ARIMA Results
plt.figure(figsize=(12, 8))
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, arima_val_pred, label='ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'ARIMA Model: Order {auto_arima_model.order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: SARIMA Results
plt.figure(figsize=(12, 8))
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, sarima_val_pred, label='SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'SARIMA Model: Order {auto_sarima_model.order}{auto_sarima_model.seasonal_order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Final Test Results
plt.figure(figsize=(12, 8))
plt.plot(df.index[:train_size+val_size], final_train_data, label='Train+Validation', color='blue', alpha=0.7)
plt.plot(test_dates, test_data, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Final {best_model_name} Model: Test Performance')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 4: Separate Actual vs Predicted (Test)
plt.figure(figsize=(12, 8))
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates, 
                 test_forecast - final_metrics['RMSE'], 
                 test_forecast + final_metrics['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title(f'Actual vs Predicted - {best_model_name} (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 5: Model Comparison
plt.figure(figsize=(12, 8))
models = ['ARIMA', 'SARIMA']
rmse_values = [arima_metrics['RMSE'], sarima_metrics['RMSE']]
mae_values = [arima_metrics['MAE'], sarima_metrics['MAE']]

x = np.arange(len(models))
width = 0.35

plt.bar(x - width/2, rmse_values, width, label='RMSE', alpha=0.8)
plt.bar(x + width/2, mae_values, width, label='MAE', alpha=0.8)
plt.xlabel('Models')
plt.ylabel('Error Values')
plt.title('Model Comparison (Validation Set)')
plt.xticks(x, models)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 6: Residuals Analysis
residuals = test_data - test_forecast
plt.figure(figsize=(12, 8))
plt.plot(test_dates, residuals, color='purple', alpha=0.7)
plt.axhline(0, color='red', linestyle='--', linewidth=2)
plt.title(f'{best_model_name} Model Residuals (Test Set)')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.grid(True)
plt.xticks(rotation=45)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result6.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 11: Future Forecasting ---
print("\n" + "="*50)
print("FUTURE FORECASTING")
print("="*50)

# Forecast next 12 periods
future_steps = 12
future_forecast = final_result.forecast(steps=len(test_data) + future_steps)
future_forecast = future_forecast[-future_steps:]  # Get only future values

# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=future_steps, freq='W')

print(f"Future Forecast (Next {future_steps} weeks):")
print("-" * 40)
for i, (date, price) in enumerate(zip(future_dates, future_forecast), 1):
    print(f"Week {i:2d} ({date.strftime('%Y-%m-%d')}): {price:.2f} Rs./kg")

# Plot future forecast
plt.figure(figsize=(12, 6))
# Plot historical data
historical_dates = df.index[-100:]  # Last 100 points
historical_data = data[-100:]
plt.plot(historical_dates, historical_data, label='Historical Data', color='blue', linewidth=2)

# Plot future forecast
plt.plot(future_dates, future_forecast, label='Future Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(future_dates, 
                 future_forecast - final_metrics['RMSE'], 
                 future_forecast + final_metrics['RMSE'], 
                 alpha=0.2, color='red', label='Uncertainty Band')

plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title(f'{best_model_name} Future Price Forecast (Next {future_steps} weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result7.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 12: Model Diagnostics ---
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)

# Plot diagnostics for the best model
if best_model_name == "ARIMA":
    print("ARIMA Model Diagnostics:")
    # ARIMA diagnostics
    residuals = final_result.resid
    
    plt.figure(figsize=(12, 8))
    plt.plot(residuals)
    plt.title('ARIMA Model Residuals')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/esarima_result8.png", dpi=300, bbox_inches='tight')
    plt.show()   
    plt.figure(figsize=(12, 8))
    plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
    plt.title('Residual Distribution')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/esarima_result9.png", dpi=300, bbox_inches='tight')
    plt.show() 
    plt.figure(figsize=(12, 8))
    plt.acorr(residuals, maxlags=20)
    plt.title('Residual Autocorrelation')
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/esarima_result11.png", dpi=300, bbox_inches='tight')
    plt.show()
    
else:
    print("SARIMA Model Diagnostics:")
    # Plot SARIMA diagnostics
    final_result.plot_diagnostics(figsize=(12, 8))
    plt.suptitle('SARIMA Model Diagnostics', y=1.02)
    plt.tight_layout()
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/esarima_result12.png", dpi=300, bbox_inches='tight')
    plt.show()

# --- Final Summary ---
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"Best Model: {best_model_name}")
print(f"Model Order: {best_order}")
print(f"Test RMSE: {final_metrics['RMSE']:.4f}")
print(f"Test MAE: {final_metrics['MAE']:.4f}")
print(f"Test MAPE: {final_metrics['MAPE']:.2f}%")
print(f"Test R²: {final_metrics['R²']:.4f}")
print(f"Directional Accuracy: {final_metrics['DA']:.2f}%")

print("\nKey Insights:")
print("- ARIMA models are simpler and faster to train")
print("- SARIMA models capture seasonal patterns better")
print("- The best model was selected based on validation performance")
print("- Future forecasts include uncertainty bands based on RMSE")
Original data length: 722
Train size: 505
Validation size: 108
Test size: 109

Checking stationarity of training data...
ADF Statistic: -1.3335
p-value: 0.6137
Critical Values:
   1%: -3.4439
   5%: -2.8675
   10%: -2.5699
Data is non-stationary

==================================================
AUTO ARIMA PARAMETER SEARCH
==================================================
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=6599.863, Time=0.60 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=6644.336, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=6630.207, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=6627.971, Time=0.06 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=6642.340, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=6604.065, Time=0.49 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=6604.049, Time=0.49 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=6598.494, Time=0.70 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=6597.054, Time=0.69 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=6614.705, Time=0.09 sec
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AIC=6598.722, Time=0.72 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=6631.489, Time=0.06 sec
 ARIMA(4,1,0)(0,0,0)[0] intercept   : AIC=6613.001, Time=0.11 sec
 ARIMA(4,1,2)(0,0,0)[0] intercept   : AIC=6599.037, Time=1.09 sec
 ARIMA(3,1,1)(0,0,0)[0]             : AIC=6595.176, Time=0.33 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=6602.227, Time=0.27 sec
 ARIMA(3,1,0)(0,0,0)[0]             : AIC=6612.716, Time=0.06 sec
 ARIMA(4,1,1)(0,0,0)[0]             : AIC=6596.852, Time=0.40 sec
 ARIMA(3,1,2)(0,0,0)[0]             : AIC=6596.627, Time=0.42 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=6629.496, Time=0.03 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=6598.059, Time=0.31 sec
 ARIMA(4,1,0)(0,0,0)[0]             : AIC=6611.015, Time=0.07 sec
 ARIMA(4,1,2)(0,0,0)[0]             : AIC=6597.167, Time=0.58 sec

Best model:  ARIMA(3,1,1)(0,0,0)[0]          
Total fit time: 7.696 seconds
Optimal ARIMA order: (3, 1, 1)

==================================================
ARIMA MODEL TRAINING
==================================================

ARIMA MODEL SUMMARY
==================================================
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(3, 1, 1)   Log Likelihood               -3292.588
Date:                Wed, 05 Nov 2025   AIC                           6595.176
Time:                        22:53:13   BIC                           6616.288
Sample:                             0   HQIC                          6603.457
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5462      0.032     17.318      0.000       0.484       0.608
ar.L2          0.0657      0.048      1.379      0.168      -0.028       0.159
ar.L3         -0.1467      0.039     -3.720      0.000      -0.224      -0.069
ma.L1         -0.7956      0.032    -24.714      0.000      -0.859      -0.733
sigma2      2.765e+04    483.915     57.144      0.000    2.67e+04    2.86e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):             43865.21
Prob(Q):                              0.95   Prob(JB):                         0.00
Heteroskedasticity (H):              19.12   Skew:                             0.63
Prob(H) (two-sided):                  0.00   Kurtosis:                        48.69
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

==================================================
AUTO SARIMA PARAMETER SEARCH
==================================================
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[26] intercept   : AIC=6598.001, Time=7.82 sec
 ARIMA(0,1,0)(0,0,0)[26] intercept   : AIC=6644.336, Time=0.02 sec
 ARIMA(1,1,0)(1,0,0)[26] intercept   : AIC=6626.490, Time=0.58 sec
 ARIMA(0,1,1)(0,0,1)[26] intercept   : AIC=6624.707, Time=1.63 sec
 ARIMA(0,1,0)(0,0,0)[26]             : AIC=6642.340, Time=0.02 sec
 ARIMA(2,1,2)(0,0,1)[26] intercept   : AIC=6596.011, Time=5.74 sec
 ARIMA(2,1,2)(0,0,0)[26] intercept   : AIC=6599.863, Time=0.56 sec
 ARIMA(2,1,2)(0,0,2)[26] intercept   : AIC=6597.998, Time=12.40 sec
 ARIMA(2,1,2)(1,0,0)[26] intercept   : AIC=6596.125, Time=6.48 sec
 ARIMA(2,1,2)(1,0,2)[26] intercept   : AIC=6599.995, Time=16.13 sec
 ARIMA(1,1,2)(0,0,1)[26] intercept   : AIC=6600.151, Time=4.46 sec
 ARIMA(2,1,1)(0,0,1)[26] intercept   : AIC=6600.150, Time=5.43 sec
 ARIMA(3,1,2)(0,0,1)[26] intercept   : AIC=6593.821, Time=7.47 sec
 ARIMA(3,1,2)(0,0,0)[26] intercept   : AIC=6598.494, Time=0.66 sec
 ARIMA(3,1,2)(1,0,1)[26] intercept   : AIC=6596.054, Time=7.80 sec
 ARIMA(3,1,2)(0,0,2)[26] intercept   : AIC=6595.598, Time=14.96 sec
 ARIMA(3,1,2)(1,0,0)[26] intercept   : AIC=6593.826, Time=7.24 sec
 ARIMA(3,1,2)(1,0,2)[26] intercept   : AIC=6597.592, Time=17.22 sec
 ARIMA(3,1,1)(0,0,1)[26] intercept   : AIC=6592.376, Time=5.06 sec
 ARIMA(3,1,1)(0,0,0)[26] intercept   : AIC=6597.054, Time=0.65 sec
 ARIMA(3,1,1)(1,0,1)[26] intercept   : AIC=6594.571, Time=7.13 sec
 ARIMA(3,1,1)(0,0,2)[26] intercept   : AIC=6594.334, Time=12.58 sec
 ARIMA(3,1,1)(1,0,0)[26] intercept   : AIC=6592.563, Time=4.84 sec
 ARIMA(3,1,1)(1,0,2)[26] intercept   : AIC=6596.320, Time=14.69 sec
 ARIMA(3,1,0)(0,0,1)[26] intercept   : AIC=6610.714, Time=0.66 sec
 ARIMA(4,1,1)(0,0,1)[26] intercept   : AIC=6593.986, Time=6.41 sec
 ARIMA(2,1,0)(0,0,1)[26] intercept   : AIC=6628.005, Time=0.60 sec
 ARIMA(4,1,0)(0,0,1)[26] intercept   : AIC=6609.412, Time=2.13 sec
 ARIMA(4,1,2)(0,0,1)[26] intercept   : AIC=6594.248, Time=6.99 sec
 ARIMA(3,1,1)(0,0,1)[26]             : AIC=6590.704, Time=2.05 sec
 ARIMA(3,1,1)(0,0,0)[26]             : AIC=6595.176, Time=0.30 sec
 ARIMA(3,1,1)(1,0,1)[26]             : AIC=6592.682, Time=3.51 sec
 ARIMA(3,1,1)(0,0,2)[26]             : AIC=6592.680, Time=5.61 sec
 ARIMA(3,1,1)(1,0,0)[26]             : AIC=6590.857, Time=1.51 sec
 ARIMA(3,1,1)(1,0,2)[26]             : AIC=6594.677, Time=8.03 sec
 ARIMA(2,1,1)(0,0,1)[26]             : AIC=6598.543, Time=2.69 sec
 ARIMA(3,1,0)(0,0,1)[26]             : AIC=6608.758, Time=0.41 sec
 ARIMA(4,1,1)(0,0,1)[26]             : AIC=6592.328, Time=2.25 sec
 ARIMA(3,1,2)(0,0,1)[26]             : AIC=6591.988, Time=2.66 sec
 ARIMA(2,1,0)(0,0,1)[26]             : AIC=6626.029, Time=0.32 sec
 ARIMA(2,1,2)(0,0,1)[26]             : AIC=6594.428, Time=2.21 sec
 ARIMA(4,1,0)(0,0,1)[26]             : AIC=6607.469, Time=1.23 sec
 ARIMA(4,1,2)(0,0,1)[26]             : AIC=6592.588, Time=3.70 sec

Best model:  ARIMA(3,1,1)(0,0,1)[26]          
Total fit time: 214.903 seconds
Optimal SARIMA order: (3, 1, 1)
Optimal Seasonal order: (0, 0, 1, 26)

==================================================
SARIMA MODEL TRAINING
==================================================

SARIMA MODEL SUMMARY
==================================================
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                  505
Model:             SARIMAX(3, 1, 1)x(0, 0, 1, 26)   Log Likelihood               -3114.950
Date:                            Wed, 05 Nov 2025   AIC                           6241.900
Time:                                    22:56:50   BIC                           6266.893
Sample:                                         0   HQIC                          6251.728
                                            - 505                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5672      0.028     20.138      0.000       0.512       0.622
ar.L2          0.0648      0.048      1.342      0.180      -0.030       0.160
ar.L3         -0.1593      0.041     -3.911      0.000      -0.239      -0.079
ma.L1         -0.8109      0.028    -29.161      0.000      -0.865      -0.756
ma.S.L26      -0.1196      0.018     -6.719      0.000      -0.154      -0.085
sigma2      2.823e+04    611.104     46.201      0.000     2.7e+04    2.94e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):             34527.58
Prob(Q):                              0.99   Prob(JB):                         0.00
Heteroskedasticity (H):              33.72   Skew:                             1.06
Prob(H) (two-sided):                  0.00   Kurtosis:                        44.67
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

ARIMA VALIDATION PERFORMANCE:
========================================
MSE: 428514.3561
RMSE: 654.6101
MAE: 604.0947
MAPE: 0.62%
R²: -5.3783
Directional Accuracy: 31.78%

SARIMA VALIDATION PERFORMANCE:
========================================
MSE: 608951.4528
RMSE: 780.3534
MAE: 731.0257
MAPE: 0.74%
R²: -8.0640
Directional Accuracy: 26.17%

SELECTED BEST MODEL: ARIMA

FINAL ARIMA VALIDATION PERFORMANCE:
========================================
MSE: 658967.5673
RMSE: 811.7682
MAE: 686.5055
MAPE: 0.38%
R²: -2.3151
Directional Accuracy: 31.48%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
==================================================
FUTURE FORECASTING
==================================================
Future Forecast (Next 12 weeks):
----------------------------------------
Week  1 (2024-11-03): 905.05 Rs./kg
Week  2 (2024-11-10): 905.05 Rs./kg
Week  3 (2024-11-17): 905.05 Rs./kg
Week  4 (2024-11-24): 905.05 Rs./kg
Week  5 (2024-12-01): 905.05 Rs./kg
Week  6 (2024-12-08): 905.05 Rs./kg
Week  7 (2024-12-15): 905.05 Rs./kg
Week  8 (2024-12-22): 905.05 Rs./kg
Week  9 (2024-12-29): 905.05 Rs./kg
Week 10 (2025-01-05): 905.05 Rs./kg
Week 11 (2025-01-12): 905.05 Rs./kg
Week 12 (2025-01-19): 905.05 Rs./kg
No description has been provided for this image
==================================================
MODEL DIAGNOSTICS
==================================================
ARIMA Model Diagnostics:
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
============================================================
FINAL SUMMARY
============================================================
Best Model: ARIMA
Model Order: (3, 1, 1)
Test RMSE: 811.7682
Test MAE: 686.5055
Test MAPE: 0.38%
Test R²: -2.3151
Directional Accuracy: 31.48%

Key Insights:
- ARIMA models are simpler and faster to train
- SARIMA models capture seasonal patterns better
- The best model was selected based on validation performance
- Future forecasts include uncertainty bands based on RMSE
In [11]:
import matplotlib.pyplot as plt

# Use test_dates, test_data, and test_forecast for SARIMA model
plt.figure(figsize=(12, 8))
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label='SARIMA Predicted', color='orange', linestyle='--', linewidth=2)
plt.title('SARIMA: Actual vs Predicted Cardamom Price (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/sarima_test_vs_predicted.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [2]:
# Create a temporary series for decomposition
train_series = pd.Series(train_data, index=df.index[:train_size])

# Perform seasonal decomposition
# Using a period of 52 for weekly data with yearly seasonality
decomposition = seasonal_decompose(train_series, model='additive', period=26)

# Plot the decomposition
plt.figure(figsize=(12, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposition.observed)
plt.title('Observed')
plt.grid(True)

plt.subplot(4, 1, 2)
plt.plot(decomposition.trend)
plt.title('Trend')
plt.grid(True)

plt.subplot(4, 1, 3)
plt.plot(decomposition.seasonal)
plt.title('Seasonality')
plt.grid(True)

plt.subplot(4, 1, 4)
plt.plot(decomposition.resid)
plt.title('Residuals')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result41.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [60]:
# --- Step 9: Final Evaluation on Test Set ---
# Evaluate both models on test set
# First, let's get predictions from both models on the test set

# ARIMA model predictions on test set
arima_full_model = ARIMA(np.concatenate([train_data, val_data]), order=auto_arima_model.order)
arima_full_result = arima_full_model.fit()
arima_test_pred = arima_full_result.forecast(steps=len(test_data))

# SARIMA model predictions on test set
sarima_full_model = SARIMAX(
    np.concatenate([train_data, val_data]),
    order=auto_sarima_model.order,
    seasonal_order=auto_sarima_model.seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)
sarima_full_result = sarima_full_model.fit(disp=False)
sarima_test_pred = sarima_full_result.forecast(steps=len(test_data))

# Evaluate both models on test set
arima_test_metrics = evaluate_model(test_data, arima_test_pred, "ARIMA TEST")
sarima_test_metrics = evaluate_model(test_data, sarima_test_pred, "SARIMA TEST")

# Select the best model based on test performance
if arima_test_metrics['RMSE'] < sarima_test_metrics['RMSE']:
    print("\nSELECTED BEST MODEL: ARIMA")
    best_model = arima_full_result
    best_model_name = "ARIMA"
    best_order = auto_arima_model.order
    test_forecast = arima_test_pred
else:
    print("\nSELECTED BEST MODEL: SARIMA")
    best_model = sarima_full_result
    best_model_name = "SARIMA"
    best_order = f"{auto_sarima_model.order}{auto_sarima_model.seasonal_order}"
    test_forecast = sarima_test_pred

final_metrics = evaluate_model(test_data, test_forecast, f"FINAL {best_model_name}")

# Create a comparison table of metrics
metrics_df = pd.DataFrame({
    'ARIMA Validation': arima_metrics,
    'SARIMA Validation': sarima_metrics,
    'ARIMA Test': arima_test_metrics,
    'SARIMA Test': sarima_test_metrics,
    'Final Model': final_metrics
}).T

print("\n" + "="*60)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*60)
print(metrics_df.round(4))

# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]

# Create separate figures for ARIMA and SARIMA actual vs predicted
plt.figure(figsize=(15, 10))

# ARIMA Actual vs Predicted (Test Set)
plt.subplot(2, 2, 1)
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, arima_test_pred, label='ARIMA Predicted', color='red', linestyle='--', linewidth=2)
plt.title('ARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# ARIMA Scatter Plot (Test Set)
plt.subplot(2, 2, 2)
plt.scatter(test_data, arima_test_pred, alpha=0.6)
min_val = min(test_data.min(), arima_test_pred.min())
max_val = max(test_data.max(), arima_test_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect Prediction')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('ARIMA: Actual vs Predicted Scatter Plot (Test Set)')
plt.legend()
plt.grid(True)

# SARIMA Actual vs Predicted (Test Set)
plt.subplot(2, 2, 3)
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, sarima_test_pred, label='SARIMA Predicted', color='green', linestyle='--', linewidth=2)
plt.title('SARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# SARIMA Scatter Plot (Test Set)
plt.subplot(2, 2, 4)
plt.scatter(test_data, sarima_test_pred, alpha=0.6)
min_val = min(test_data.min(), sarima_test_pred.min())
max_val = max(test_data.max(), sarima_test_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect Prediction')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('SARIMA: Actual vs Predicted Scatter Plot (Test Set)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Additional visualization showing the complete picture
plt.figure(figsize=(20, 15))

# Plot 1: ARIMA Results
plt.subplot(3, 2, 1)
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, arima_val_pred, label='ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'ARIMA Model: Order {auto_arima_model.order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# Plot 2: SARIMA Results
plt.subplot(3, 2, 2)
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, sarima_val_pred, label='SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'SARIMA Model: Order {auto_sarima_model.order}{auto_sarima_model.seasonal_order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# Plot 3: Final Test Results
plt.subplot(3, 2, 3)
plt.plot(df.index[:train_size+val_size], np.concatenate([train_data, val_data]), label='Train+Validation', color='blue', alpha=0.7)
plt.plot(test_dates, test_data, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Final {best_model_name} Model: Test Performance')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# Plot 4: Separate Actual vs Predicted (Test)
plt.subplot(3, 2, 4)
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates, 
                 test_forecast - final_metrics['RMSE'], 
                 test_forecast + final_metrics['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title(f'Actual vs Predicted - {best_model_name} (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)

# Plot 5: Model Comparison (Test Set)
plt.subplot(3, 2, 5)
models = ['ARIMA', 'SARIMA']
rmse_values = [arima_test_metrics['RMSE'], sarima_test_metrics['RMSE']]
mae_values = [arima_test_metrics['MAE'], sarima_test_metrics['MAE']]

x = np.arange(len(models))
width = 0.35

plt.bar(x - width/2, rmse_values, width, label='RMSE', alpha=0.8)
plt.bar(x + width/2, mae_values, width, label='MAE', alpha=0.8)
plt.xlabel('Models')
plt.ylabel('Error Values')
plt.title('Model Comparison (Test Set)')
plt.xticks(x, models)
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 6: Residuals Analysis
residuals = test_data - test_forecast
plt.subplot(3, 2, 6)
plt.plot(test_dates, residuals, color='purple', alpha=0.7)
plt.axhline(0, color='red', linestyle='--', linewidth=2)
plt.title(f'{best_model_name} Model Residuals (Test Set)')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.grid(True)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# --- Step 11: Future Forecasting ---
print("\n" + "="*50)
print("FUTURE FORECASTING")
print("="*50)

# Forecast next 12 periods
future_steps = 12
future_forecast = best_model.forecast(steps=len(test_data) + future_steps)
future_forecast = future_forecast[-future_steps:]  # Get only future values

# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=future_steps, freq='W')

print(f"Future Forecast (Next {future_steps} weeks):")
print("-" * 40)
for i, (date, price) in enumerate(zip(future_dates, future_forecast), 1):
    print(f"Week {i:2d} ({date.strftime('%Y-%m-%d')}): {price:.2f} Rs./kg")

# Plot future forecast
plt.figure(figsize=(12, 6))
# Plot historical data
historical_dates = df.index[-100:]  # Last 100 points
historical_data = data[-100:]
plt.plot(historical_dates, historical_data, label='Historical Data', color='blue', linewidth=2)

# Plot future forecast
plt.plot(future_dates, future_forecast, label='Future Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(future_dates, 
                 future_forecast - final_metrics['RMSE'], 
                 future_forecast + final_metrics['RMSE'], 
                 alpha=0.2, color='red', label='Uncertainty Band')

plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title(f'{best_model_name} Future Price Forecast (Next {future_steps} weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# --- Step 12: Model Diagnostics ---
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)

# Plot diagnostics for the best model
if best_model_name == "ARIMA":
    print("ARIMA Model Diagnostics:")
    # ARIMA diagnostics
    residuals = best_model.resid
    plt.figure(figsize=(12, 8))
    plt.subplot(2, 2, 1)
    plt.plot(residuals)
    plt.title('ARIMA Model Residuals')
    plt.grid(True)
    
    plt.subplot(2, 2, 2)
    plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
    plt.title('Residual Distribution')
    plt.grid(True)
    
    plt.subplot(2, 2, 3)
    plt.acorr(residuals, maxlags=20)
    plt.title('Residual Autocorrelation')
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
    
else:
    print("SARIMA Model Diagnostics:")
    # Plot SARIMA diagnostics
    best_model.plot_diagnostics(figsize=(12, 8))
    plt.suptitle('SARIMA Model Diagnostics', y=1.02)
    plt.tight_layout()
    plt.show()

# --- Final Summary ---
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"Best Model: {best_model_name}")
print(f"Model Order: {best_order}")
print(f"Test RMSE: {final_metrics['RMSE']:.4f}")
print(f"Test MAE: {final_metrics['MAE']:.4f}")
print(f"Test MAPE: {final_metrics['MAPE']:.2f}%")
print(f"Test R²: {final_metrics['R²']:.4f}")
print(f"Directional Accuracy: {final_metrics['DA']:.2f}%")

print("\nKey Insights:")
print("- ARIMA models are simpler and faster to train")
print("- SARIMA models capture seasonal patterns better")
print("- The best model was selected based on test performance")
print("- Future forecasts include uncertainty bands based on RMSE")
ARIMA TEST VALIDATION PERFORMANCE:
========================================
MSE: 658967.5673
RMSE: 811.7682
MAE: 686.5055
MAPE: 0.38%
R²: -2.3151
Directional Accuracy: 31.48%

SARIMA TEST VALIDATION PERFORMANCE:
========================================
MSE: 655872.2759
RMSE: 809.8594
MAE: 685.7176
MAPE: 0.38%
R²: -2.2995
Directional Accuracy: 33.33%

SELECTED BEST MODEL: SARIMA

FINAL SARIMA VALIDATION PERFORMANCE:
========================================
MSE: 655872.2759
RMSE: 809.8594
MAE: 685.7176
MAPE: 0.38%
R²: -2.2995
Directional Accuracy: 33.33%

============================================================
COMPREHENSIVE MODEL COMPARISON
============================================================
                           MSE      RMSE       MAE    MAPE      R²       DA
ARIMA Validation   428514.3561  654.6101  604.0947  0.6185 -5.3783  31.7757
SARIMA Validation  608951.4528  780.3534  731.0257  0.7416 -8.0640  26.1682
ARIMA Test         658967.5673  811.7682  686.5055  0.3810 -2.3151  31.4815
SARIMA Test        655872.2759  809.8594  685.7176  0.3812 -2.2995  33.3333
Final Model        655872.2759  809.8594  685.7176  0.3812 -2.2995  33.3333
No description has been provided for this image
No description has been provided for this image
==================================================
FUTURE FORECASTING
==================================================
Future Forecast (Next 12 weeks):
----------------------------------------
Week  1 (2024-11-03): 907.44 Rs./kg
Week  2 (2024-11-10): 907.44 Rs./kg
Week  3 (2024-11-17): 907.44 Rs./kg
Week  4 (2024-11-24): 907.44 Rs./kg
Week  5 (2024-12-01): 907.44 Rs./kg
Week  6 (2024-12-08): 907.44 Rs./kg
Week  7 (2024-12-15): 907.44 Rs./kg
Week  8 (2024-12-22): 907.44 Rs./kg
Week  9 (2024-12-29): 907.44 Rs./kg
Week 10 (2025-01-05): 907.44 Rs./kg
Week 11 (2025-01-12): 907.44 Rs./kg
Week 12 (2025-01-19): 907.44 Rs./kg
No description has been provided for this image
==================================================
MODEL DIAGNOSTICS
==================================================
SARIMA Model Diagnostics:
No description has been provided for this image
============================================================
FINAL SUMMARY
============================================================
Best Model: SARIMA
Model Order: (3, 1, 1)(0, 0, 1, 26)
Test RMSE: 809.8594
Test MAE: 685.7176
Test MAPE: 0.38%
Test R²: -2.2995
Directional Accuracy: 33.33%

Key Insights:
- ARIMA models are simpler and faster to train
- SARIMA models capture seasonal patterns better
- The best model was selected based on test performance
- Future forecasts include uncertainty bands based on RMSE
In [70]:
# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]






# Final Test Results
plt.figure(figsize=(16, 6))

# ARIMA Actual vs Predicted (Test Set)

plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, arima_test_pred, label='ARIMA Predicted', color='red', linestyle='--', linewidth=2)
plt.title('ARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result15.png", dpi=300, bbox_inches='tight')
plt.show()
Out[70]:
(array([19266., 19358., 19448., 19539., 19631., 19723., 19814., 19905.,
        19997.]),
 [Text(19266.0, 0, '2022-10'),
  Text(19358.0, 0, '2023-01'),
  Text(19448.0, 0, '2023-04'),
  Text(19539.0, 0, '2023-07'),
  Text(19631.0, 0, '2023-10'),
  Text(19723.0, 0, '2024-01'),
  Text(19814.0, 0, '2024-04'),
  Text(19905.0, 0, '2024-07'),
  Text(19997.0, 0, '2024-10')])
No description has been provided for this image
In [86]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress barimport numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress bar

1. Load and Normalize Data¶

In [89]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [90]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[90]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal)
Date
2011-01-16 Kerala Idukki Nedumkandam Other Spices 14.0 120000 150000 130000
2011-01-23 Kerala Idukki Nedumkandam Other Spices 17.0 120000 150000 140000
2011-01-30 Kerala Idukki Nedumkandam Other Spices 12.0 120000 150000 130000
2011-02-06 Kerala Idukki Nedumkandam Other Spices 8.5 120000 150000 125000
2011-02-13 Kerala Idukki Nedumkandam Other Spices 9.2 100000 115000 107500

--------------- 1. Remove Outliers ---------------¶

In [92]:
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    filtered_df = df[(df[column] >= (Q1 - 1.5 * IQR)) & (df[column] <= (Q3 + 1.5 * IQR))]
    return filtered_df
In [93]:
df_clean = remove_outliers(df, 'Modal Price (Rs./Quintal)')

--------------- 2. Check Stationarity ---------------¶

In [95]:
def check_stationarity(df, column):
    result = adfuller(df[column])
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    if result[1] > 0.05:
        print("Series is not stationary. Differencing will be applied.")
    else:
        print("Series is stationary.")

check_stationarity(df_clean, 'Modal Price (Rs./Quintal)')
ADF Statistic: -2.1259554387544832
p-value: 0.23420666194205048
Series is not stationary. Differencing will be applied.

--------------- 3. Differencing (if needed) ---------------¶

In [97]:
df_clean['Differenced'] = df_clean['Modal Price (Rs./Quintal)'].diff().dropna()
In [99]:
check_stationarity(df_clean.dropna(), 'Differenced')
ADF Statistic: -14.327533178188892
p-value: 1.1140991886231331e-26
Series is stationary.

--------------- 5. ACF and PACF Plots ---------------¶

In [103]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
plot_acf(df_clean['Differenced'].dropna(), ax=axes[0], lags=40)
axes[0].set_title("ACF - Differenced Series")
plot_pacf(df_clean['Differenced'].dropna(), ax=axes[1], lags=40, method='ywm')
axes[1].set_title("PACF - Differenced Series")
plt.tight_layout()
plt.show()
No description has been provided for this image

--------------- 6. Train-Validation-Test Split ---------------¶

In [108]:
df_clean['Modal Price (Rs./kg)'] = df_clean['Modal Price (Rs./Quintal)']/100
In [110]:
train_size = int(len(df_clean) * 0.7)
val_size = int(len(df_clean) * 0.15)

train = df_clean['Modal Price (Rs./kg)'].iloc[:train_size]
val = df_clean['Modal Price (Rs./kg)'].iloc[train_size:train_size + val_size]
test = df_clean['Modal Price (Rs./kg)'].iloc[train_size + val_size:]

----------------- 7. Time Series Cross-Validation Function (for ARIMA) -----------------¶

In [113]:
def arima_cv(train_series, val_series, p_values, d_values, q_values):

    best_score, best_cfg = float("inf"), None
    results = []
    for p in tqdm(p_values, desc="p loop"):
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                
                # Skip models with p=0 and q=0 (no AR or MA terms)
                if (p == 0 and q == 0):
                    continue
                
                try:
                    model = SARIMAX(train_series,
                                    order=order,
                                    seasonal_order=(0,0,0,0), # No seasonality
                                    enforce_stationarity=False,
                                    enforce_invertibility=False)
                    model_fit = model.fit(disp=False)
                    
                    forecast = model_fit.forecast(steps=len(val_series))
                    rmse = np.sqrt(mean_squared_error(val_series, forecast))
                    results.append((order, rmse))
                    
                    if rmse < best_score:
                        best_score = rmse
                        best_cfg = order
                        
                except Exception as e:
                    # Optional: print(e)
                    continue
                    
    return best_cfg, results

--------------- 8. Find the best ARIMA parameters --------------¶

In [142]:
import pandas as pd
import numpy as np
import pmdarima as pm
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')

# Load your data
df = pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy\kattapana.xlsx", parse_dates=True)

# Assuming you have a time series column (e.g., 'Modal Price')
# Make sure the data is sorted by date and set date as index if needed
# df = df.set_index('Date_column_name')

# Select your time series data
# Replace 'Modal Price (Rs./Quintal)' with your actual column name
ts_data = df['Modal Price (Rs./Quintal)'].dropna()

print("🔍 Running auto_arima to find optimal ARIMA parameters...")

# Use auto_arima to find the best model
model = auto_arima(ts_data,
                   seasonal=False,        # Non-seasonal data
                   stationary=False,      # Check for stationarity
                   start_p=0,            # Minimum p value
                   max_p=8,              # Maximum p value
                   start_q=0,            # Minimum q value
                   max_q=8,              # Maximum q value
                   d=None,               # Let model determine differencing
                   test='adf',           # Augmented Dickey-Fuller test for stationarity
                   trace=True,           # Show progress
                   error_action='ignore', # Ignore invalid orders
                   suppress_warnings=True,
                   stepwise=True,        # Use stepwise algorithm for faster computation
                   information_criterion='aic'  # Use AIC for model selection
                  )

print(f"\n✅ Best ARIMA Model: {model.order}")
print(f"📊 AIC: {model.aic():.2f}")
print(f"📊 BIC: {model.bic():.2}")

# Fit the best model
best_model = model

# Summary of the best model
print("\n" + "="*50)
print("BEST MODEL SUMMARY:")
print("="*50)
print(best_model.summary())

# Get model parameters
best_cfg = best_model.order
print(f"\n🎯 Optimal ARIMA Order: {best_cfg}")

# Forecast using the best model (example: 10 steps ahead)
forecast_steps = 10
forecast, conf_int = best_model.predict(n_periods=forecast_steps, return_conf_int=True)

print(f"\n📈 {forecast_steps}-step Forecast:")
for i, (point, (lower, upper)) in enumerate(zip(forecast, conf_int)):
    print(f"Step {i+1}: ₹{point:.2f} (95% CI: ₹{lower:.2f} - ₹{upper:.2f})")
🔍 Running auto_arima to find optimal ARIMA parameters...
Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=11425.007, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=11421.657, Time=0.09 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=11422.560, Time=0.08 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=11423.133, Time=0.02 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=11418.139, Time=0.12 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=11419.943, Time=0.13 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=11419.656, Time=0.28 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=11418.345, Time=0.19 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=11421.498, Time=0.43 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=11416.196, Time=0.09 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=11419.745, Time=0.05 sec
 ARIMA(3,1,0)(0,0,0)[0]             : AIC=11417.995, Time=0.13 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=11417.595, Time=0.23 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=11416.122, Time=0.17 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=11420.658, Time=0.06 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=11417.690, Time=0.29 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=11416.954, Time=0.11 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=11413.047, Time=0.52 sec
 ARIMA(3,1,2)(0,0,0)[0]             : AIC=11414.632, Time=0.80 sec
 ARIMA(2,1,3)(0,0,0)[0]             : AIC=11414.389, Time=0.94 sec
 ARIMA(1,1,3)(0,0,0)[0]             : AIC=11419.480, Time=0.47 sec
 ARIMA(3,1,1)(0,0,0)[0]             : AIC=11419.343, Time=0.48 sec
 ARIMA(3,1,3)(0,0,0)[0]             : AIC=inf, Time=1.42 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=11415.012, Time=0.59 sec

Best model:  ARIMA(2,1,2)(0,0,0)[0]          
Total fit time: 7.797 seconds

✅ Best ARIMA Model: (2, 1, 2)
📊 AIC: 11413.05
📊 BIC: 1.1e+04

==================================================
BEST MODEL SUMMARY:
==================================================
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  543
Model:               SARIMAX(2, 1, 2)   Log Likelihood               -5701.524
Date:                Fri, 22 Aug 2025   AIC                          11413.047
Time:                        11:44:13   BIC                          11434.523
Sample:                             0   HQIC                         11421.445
                                - 543                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2861      0.120     -2.385      0.017      -0.521      -0.051
ar.L2          0.5864      0.106      5.531      0.000       0.379       0.794
ma.L1          0.3700      0.128      2.895      0.004       0.120       0.620
ma.L2         -0.4451      0.113     -3.930      0.000      -0.667      -0.223
sigma2      8.203e+07   2.15e-09   3.82e+16      0.000     8.2e+07     8.2e+07
===================================================================================
Ljung-Box (L1) (Q):                   0.16   Jarque-Bera (JB):              4753.21
Prob(Q):                              0.69   Prob(JB):                         0.00
Heteroskedasticity (H):               1.41   Skew:                             0.60
Prob(H) (two-sided):                  0.02   Kurtosis:                        17.46
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.63e+31. Standard errors may be unstable.

🎯 Optimal ARIMA Order: (2, 1, 2)

📈 10-step Forecast:
Step 1: ₹130178.66 (95% CI: ₹112426.80 - ₹147930.51)
Step 2: ₹130433.51 (95% CI: ₹104254.00 - ₹156613.03)
Step 3: ₹130465.37 (95% CI: ₹96700.31 - ₹164230.43)
Step 4: ₹130605.70 (95% CI: ₹90521.80 - ₹170689.60)
Step 5: ₹130584.24 (95% CI: ₹84497.53 - ₹176670.95)
Step 6: ₹130672.67 (95% CI: ₹79351.79 - ₹181993.55)
Step 7: ₹130634.79 (95% CI: ₹74273.97 - ₹186995.60)
Step 8: ₹130697.48 (95% CI: ₹69826.13 - ₹191568.83)
Step 9: ₹130657.33 (95% CI: ₹65406.79 - ₹195907.87)
Step 10: ₹130705.58 (95% CI: ₹61460.00 - ₹199951.16)

--------------- 9. Fit Best Model ---------------¶

In [144]:
best_model = SARIMAX(train, 
                     order=best_cfg,
                     seasonal_order=(0,0,0,0), # No seasonality
                     enforce_stationarity=False,
                     enforce_invertibility=False)
best_result = best_model.fit(disp=False)
In [146]:
print(best_result.summary())
                                SARIMAX Results                                 
================================================================================
Dep. Variable:     Modal Price (Rs./kg)   No. Observations:                  373
Model:                 SARIMAX(2, 1, 2)   Log Likelihood               -2185.967
Date:                  Fri, 22 Aug 2025   AIC                           4381.933
Time:                          11:44:44   BIC                           4401.487
Sample:                               0   HQIC                          4389.701
                                  - 373                                         
Covariance Type:                    opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.1512      0.316      3.647      0.000       0.533       1.770
ar.L2         -0.4004      0.219     -1.832      0.067      -0.829       0.028
ma.L1         -1.3181      0.326     -4.045      0.000      -1.957      -0.679
ma.L2          0.3979      0.298      1.335      0.182      -0.186       0.982
sigma2      8153.8917    164.324     49.621      0.000    7831.822    8475.962
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):             11811.78
Prob(Q):                              0.82   Prob(JB):                         0.00
Heteroskedasticity (H):               2.03   Skew:                             2.43
Prob(H) (two-sided):                  0.00   Kurtosis:                        30.29
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

--------------- 10. Forecast on Test Set ---------------¶

In [149]:
forecast = best_result.forecast(steps=len(test))

9. Create indices for plotting¶

In [152]:
train_idx = df_clean.iloc[:len(train)].index
val_idx = df_clean.iloc[len(train):len(train)+len(val)].index
test_idx = df_clean.iloc[len(train)+len(val):len(train)+len(val)+len(test)].index

11. Evaluation Metrics¶

In [155]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100

def directional_accuracy(y_true, y_pred):
    true_direction = np.sign(np.diff(y_true))
    pred_direction = np.sign(np.diff(y_pred))
    return np.mean(true_direction == pred_direction) * 100

# Compute metrics
rmse = np.sqrt(mean_squared_error(test, forecast))
mae = mean_absolute_error(test, forecast)
mape = safe_mape(test.values, forecast.values)
r2 = r2_score(test, forecast)
da = directional_accuracy(test.values, forecast.values)

# Print all metrics
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 206.9358, MAE: 183.2453, MAPE: 17.86%, R²: -0.3084, Directional Accuracy: 22.50%
In [157]:
results_df
Out[157]:
Date Actual Predicted
0 2020-10-28 1500.0 1667.237235
1 2020-11-25 1500.0 1637.548541
2 2020-12-04 1500.0 1610.781425
3 2020-12-11 1500.0 1586.648382
4 2020-12-26 1600.0 1564.890201
... ... ... ...
90 2023-08-21 2000.0 1365.571500
91 2023-08-22 1900.0 1365.568849
92 2024-02-29 1350.0 1365.566458
93 2024-03-03 1300.0 1365.564302
94 2024-08-27 2200.0 1365.562359

95 rows × 3 columns

In [158]:
results_df = pd.DataFrame({
    'Index': test.index,
    'Test_Values': test.values.flatten(),
    'Predicted_Values': forecast.values.flatten()
})
results_df.to_csv('fixed_predictions.tsv', index=False, sep='\t')

10. Plotting actual vs predicted values (No scaling, so use directly)¶

In [162]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train, label='Train', color='blue')
plt.plot(val_idx, val, label='Validation', color='green')
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('ARIMA Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image
In [163]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
Out[163]:
[<matplotlib.lines.Line2D at 0x1dce6b4a780>]
No description has been provided for this image

13. Diagnostic Plots¶

In [167]:
best_result.plot_diagnostics(figsize=(15, 12))
plt.show()
No description has been provided for this image

--- SARIMA---¶

--- Import libraries for SARIMA----¶

In [155]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress barimport numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress bar

1. Load and Normalize Data¶

In [157]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [158]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[158]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal)
Date
2011-01-16 Kerala Idukki Nedumkandam Other Spices 14.0 120000 150000 130000
2011-01-23 Kerala Idukki Nedumkandam Other Spices 17.0 120000 150000 140000
2011-01-30 Kerala Idukki Nedumkandam Other Spices 12.0 120000 150000 130000
2011-02-06 Kerala Idukki Nedumkandam Other Spices 8.5 120000 150000 125000
2011-02-13 Kerala Idukki Nedumkandam Other Spices 9.2 100000 115000 107500
In [159]:
# --------------- 4. Seasonality Decomposition (Multiplicative) ---------------
# Assuming your data is weekly and yearly seasonality (period=52 weeks)
result = seasonal_decompose(df['Modal Price (Rs./Quintal)'], model='multiplicative', period=13)
fig = result.plot()
fig.set_size_inches(16, 9)
plt.suptitle("Seasonal Decomposition of Modal Price (Multiplicative Model)", fontsize=18)
plt.show()
No description has been provided for this image

1. Remove Outliers (e.g., using IQR method)¶

In [161]:
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    filtered_df = df[(df[column] >= (Q1 - 1.5 * IQR)) & (df[column] <= (Q3 + 1.5 * IQR))]
    return filtered_df

Assuming 'Modal Price (Rs./Quintal)' is the target column¶

In [163]:
df_clean = remove_outliers(df, 'Modal Price (Rs./Quintal)')

2. Check for Stationarity (ADF Test)¶

In [167]:
def check_stationarity(df, column):
    result = adfuller(df[column])
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    if result[1] > 0.05:
        print("Series is not stationary. Differencing will be applied.")
    else:
        print("Series is stationary.")

check_stationarity(df_clean, 'Modal Price (Rs./Quintal)')
ADF Statistic: -2.1259554387544832
p-value: 0.23420666194205048
Series is not stationary. Differencing will be applied.

Apply differencing if the series is not stationary¶

In [170]:
df_clean['Differenced'] = df_clean['Modal Price (Rs./Quintal)'].diff().dropna()

Check again if differencing made it stationary¶

In [172]:
check_stationarity(df_clean.dropna(), 'Differenced')
ADF Statistic: -14.327533178188892
p-value: 1.1140991886231331e-26
Series is stationary.

4. ACF and PACF Plots to determine p, q, P, Q for SARIMA¶

In [174]:
plot_acf(df_clean['Differenced'].dropna())
plt.title("ACF Plot")
plt.show()
No description has been provided for this image
In [175]:
plot_pacf(df_clean['Differenced'].dropna())
plt.title("PACF Plot")
plt.show()
No description has been provided for this image

--------------- 4. Seasonality Decomposition (Multiplicative) ---------------¶

Assuming your data is weekly and yearly seasonality (period=13 weeks)¶

In [177]:
result = seasonal_decompose(df_clean['Modal Price (Rs./Quintal)'], model='multiplicative', period=13)
fig = result.plot()
fig.set_size_inches(16, 9)
plt.suptitle("Seasonal Decomposition of Modal Price (Multiplicative Model)", fontsize=18)
plt.show()
No description has been provided for this image

5. Split the data into Train, Validation, and Test¶

In [179]:
df_clean['Price (Rs./kg)'] = df_clean['Modal Price (Rs./Quintal)'] / 100
In [181]:
train_size = int(len(df_clean) * 0.7)
val_size = int(len(df_clean) * 0.15)

train = df_clean['Price (Rs./kg)'].iloc[:train_size]
val = df_clean['Price (Rs./kg)'].iloc[train_size:train_size + val_size]
test = df_clean['Price (Rs./kg)'].iloc[train_size + val_size:]

----------------- 7. Time Series Cross-Validation Function (for SARIMA) -----------------¶

In [193]:
def sarima_cv(train_series, p_values, d_values, q_values, P_values, D_values, Q_values, m):
    best_score, best_cfg = float("inf"), None
    results = []
    for p in tqdm(p_values, desc="p loop"):
        for d in d_values:
            for q in q_values:
                for P in P_values:
                    for D in D_values:
                        for Q in Q_values:
                            if (p + q + P + Q) == 0:  # avoid no model
                                continue
                            order = (p, d, q)
                            seasonal_order = (P, D, Q, m)
                            try:
                                model = SARIMAX(train_series,
                                                order=order,
                                                seasonal_order=seasonal_order,
                                                enforce_stationarity=False,
                                                enforce_invertibility=False)
                                model_fit = model.fit(disp=False)
                                
                                aic = model_fit.aic  # use AIC if needed
                                #forecast = model_fit.forecast(steps=len(val))
                                #rmse = np.sqrt(mean_squared_error(val, forecast))
                                results.append((order, seasonal_order, aic))
                                
                                if aic < best_score:
                                    best_score = aic
                                    best_cfg = (order, seasonal_order)
                            except:
                                continue
    return best_cfg, results

--------------- 8. Find the best SARIMA parameters ---------------¶

In [196]:
p_values = range(0, 5)
d_values = range(0, 1)
q_values = range(0, 5)
P_values = range(0, 3)
D_values = range(0, 1)
Q_values = range(0, 3)
m = 26 # weekly seasonality
In [198]:
# Alternative with automatic seasonal period detection
model = auto_arima(ts_data,
                   seasonal=True,
                   m=26,                 # Known seasonal period (26 weeks)
                   # m='auto',           # Uncomment to let pmdarima detect seasonal period
                   start_p=0, max_p=6,
                   start_q=0, max_q=6,
                   start_P=0, max_P=3,
                   start_Q=0, max_Q=3,
                   max_d=2,              # Maximum regular differencing
                   max_D=1,              # Maximum seasonal differencing
                   test='adf',
                   trace=True,
                   error_action='warn',  # Warn instead of ignore errors
                   suppress_warnings=False,
                   stepwise=True,
                   information_criterion='aic',
                   n_jobs=-1,            # Use all CPU cores
                   n_fits=30             # Number of models to try
                  )

# Check if seasonal component is significant
if model.seasonal_order[1] > 0 or model.seasonal_order[3] > 0:
    print("✅ Significant seasonal pattern detected")
else:
    print("⚠️  No significant seasonal pattern found")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[198], line 2
      1 # Alternative with automatic seasonal period detection
----> 2 model = auto_arima(ts_data,
      3                    seasonal=True,
      4                    m=26,                 # Known seasonal period (26 weeks)
      5                    # m='auto',           # Uncomment to let pmdarima detect seasonal period
      6                    start_p=0, max_p=6,
      7                    start_q=0, max_q=6,
      8                    start_P=0, max_P=3,
      9                    start_Q=0, max_Q=3,
     10                    max_d=2,              # Maximum regular differencing
     11                    max_D=1,              # Maximum seasonal differencing
     12                    test='adf',
     13                    trace=True,
     14                    error_action='warn',  # Warn instead of ignore errors
     15                    suppress_warnings=False,
     16                    stepwise=True,
     17                    information_criterion='aic',
     18                    n_jobs=-1,            # Use all CPU cores
     19                    n_fits=30             # Number of models to try
     20                   )
     22 # Check if seasonal component is significant
     23 if model.seasonal_order[1] > 0 or model.seasonal_order[3] > 0:

NameError: name 'ts_data' is not defined

--------------- 9. Fit Best Model ---------------¶

In [221]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Assuming best_cfg is in the format: ((p,d,q), (P,D,Q,m))
best_cfg = ((2, 1, 2), (0, 0, 0, 26))

# Correct way to pass the parameters
best_model = SARIMAX(train, 
                     order=best_cfg[0],      # This should be (p,d,q) tuple
                     seasonal_order=best_cfg[1],  # This should be (P,D,Q,m) tuple
                     enforce_stationarity=False,
                     enforce_invertibility=False)

# Fit the model (only need to fit once)
best_result = best_model.fit(disp=False)

print("✅ SARIMA model fitted successfully!")
print(f"Model order: {best_cfg[0]}")
print(f"Seasonal order: {best_cfg[1]}")
✅ SARIMA model fitted successfully!
Model order: (2, 1, 2)
Seasonal order: (0, 0, 0, 26)
In [225]:
print(best_result.summary())
                               SARIMAX Results                                
==============================================================================
Dep. Variable:         Price (Rs./kg)   No. Observations:                  373
Model:               SARIMAX(2, 1, 2)   Log Likelihood               -2185.967
Date:                Fri, 22 Aug 2025   AIC                           4381.933
Time:                        11:47:09   BIC                           4401.487
Sample:                             0   HQIC                          4389.701
                                - 373                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.1512      0.316      3.647      0.000       0.533       1.770
ar.L2         -0.4004      0.219     -1.832      0.067      -0.829       0.028
ma.L1         -1.3181      0.326     -4.045      0.000      -1.957      -0.679
ma.L2          0.3979      0.298      1.335      0.182      -0.186       0.982
sigma2      8153.8917    164.324     49.621      0.000    7831.822    8475.962
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):             11811.78
Prob(Q):                              0.82   Prob(JB):                         0.00
Heteroskedasticity (H):               2.03   Skew:                             2.43
Prob(H) (two-sided):                  0.00   Kurtosis:                        30.29
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

--------------- 10. Forecast on Test Set ---------------¶

In [230]:
forecast = best_result.forecast(steps=len(test))

9. Create indices for plotting¶

In [233]:
train_idx = df_clean.iloc[:len(train)].index
val_idx = df_clean.iloc[len(train):len(train)+len(val)].index
test_idx = df_clean.iloc[len(train)+len(val):len(train)+len(val)+len(test)].index
In [235]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100

def directional_accuracy(y_true, y_pred):
    true_direction = np.sign(np.diff(y_true))
    pred_direction = np.sign(np.diff(y_pred))
    return np.mean(true_direction == pred_direction) * 100

# Compute metrics
rmse = np.sqrt(mean_squared_error(test, forecast))
mae = mean_absolute_error(test, forecast)
mape = safe_mape(test.values, forecast.values)
r2 = r2_score(test, forecast)
da = directional_accuracy(test.values, forecast.values)

# Print all metrics
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 206.9358, MAE: 183.2453, MAPE: 17.86%, R²: -0.3084, Directional Accuracy: 22.50%
In [125]:
# Create comparison DataFrame
results_df = pd.DataFrame({
    'Date': test.index,          # Assuming test has datetime index
    'Test_Values': test.values.flatten(),
    'Predicted_Values': forecast.values.flatten()
})

# Print first 20 rows for quick inspection
print("\nTest vs Predicted Values (first 20 rows):")
print(results_df.head(20).to_string(index=False))

# Save as TSV (tab-separated) for Excel
results_df.to_csv('test_vs_predicted.tsv', sep='\t', index=False)
print("\n✅ Results saved as 'test_vs_predicted.tsv'")
Test vs Predicted Values (first 20 rows):
      Date  Test_Values  Predicted_Values
2021-02-05       1500.0       1185.419340
2021-02-11       1550.0       1132.433480
2021-02-17       1400.0       1149.700700
2021-02-24       1400.0       1136.296329
2021-03-02       1350.0       1149.400293
2021-03-06       1350.0       1139.591248
2021-03-16       1300.0       1139.089280
2021-03-20       1300.0       1137.811439
2021-03-27       1300.0       1148.414509
2021-04-02       1250.0       1142.703195
2021-04-17       1100.0       1147.486854
2021-04-22       1050.0       1143.880142
2021-05-04       1050.0       1151.454511
2021-05-06        900.0       1147.205248
2021-08-10       1100.0       1150.900093
2021-08-21       1050.0       1149.831182
2021-08-25       1050.0       1152.205769
2021-08-29       1100.0       1151.180134
2021-09-04       1100.0       1155.343164
2021-09-21        950.0       1154.786385

✅ Results saved as 'test_vs_predicted.tsv'

10. Plotting actual vs predicted values (No scaling, so use directly)¶

In [237]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train, label='Train', color='blue')
plt.plot(val_idx, val, label='Validation', color='green')
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('SARIMA Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image
In [238]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
Out[238]:
[<matplotlib.lines.Line2D at 0x1dce41915e0>]
No description has been provided for this image

13. Diagnostic Plots¶

In [130]:
sarima_result.plot_diagnostics(figsize=(15, 12))
plt.show()
No description has been provided for this image

--- LSTM---¶

--- Import libraries for LSTM----¶

In [ ]:
 
In [140]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt

1. Load and Normalize Data¶

In [142]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [143]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[143]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal)
Date
2010-06-08 Kerala Idukki Nedumkandam Green Medium Spices 13.0 1500 1700 1650
2010-06-13 Kerala Idukki Nedumkandam Green Medium Spices 11.0 1300 1750 1500
2010-06-20 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1800 1600
2010-06-27 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1300 1800 1650
2010-07-11 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1850 1600

1. Normalize data¶

In [145]:
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
In [146]:
scaler = MinMaxScaler()
price_scaled = scaler.fit_transform(df[['Price (Rs./kg)']])

2. Prepare supervised learning data (X and y)¶

In [148]:
def create_sequences(data, lookback):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback])
    return np.array(X), np.array(y)

lookback = 10  # You can adjust this
X, y = create_sequences(price_scaled, lookback)

3. Split into train, val, test¶

In [150]:
train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

4. Reshape for GRU [samples, timesteps, features]¶

In [152]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val   = X_val.reshape((X_val.shape[0],   X_val.shape[1],   1))
X_test  = X_test.reshape((X_test.shape[0],  X_test.shape[1],  1))

4. Build LSTM model¶

In [154]:
def build_lstm_model(hp):
    model = Sequential()
    model.add(LSTM(
        units=hp.Int('units', min_value=32, max_value=128, step=16),
        activation='tanh',
        input_shape=(X_train.shape[1], X_train.shape[2])
    ))
    model.add(Dense(1))
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        ),
        loss='mse'
    )
    return model

5. Hypertuning with TimeSeriesSplit¶

In [156]:
tuner = kt.Hyperband(
    build_lstm_model,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='lstm_tuner',
    project_name='price_forecasting'
)

early_stop = EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train, y_train,
             epochs=50,
             validation_data=(X_val, y_val),
             callbacks=[early_stop],
             verbose=1)
Reloading Tuner from lstm_tuner\price_forecasting\tuner0.json

6. Train final model¶

In [158]:
best_hp = tuner.get_best_hyperparameters(1)[0]
model = build_lstm_model(best_hp)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    callbacks=[early_stop],
    verbose=1
)

model.summary()
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
Epoch 1/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 5s 48ms/step - loss: 0.0361 - val_loss: 0.0791
Epoch 2/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0054 - val_loss: 0.0836
Epoch 3/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0053 - val_loss: 0.0468
Epoch 4/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0041 - val_loss: 0.0261
Epoch 5/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0032 - val_loss: 0.0166
Epoch 6/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0040 - val_loss: 0.0152
Epoch 7/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0148
Epoch 8/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0168
Epoch 9/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0030 - val_loss: 0.0194
Epoch 10/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0024 - val_loss: 0.0139
Epoch 11/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0023 - val_loss: 0.0120
Epoch 12/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0027 - val_loss: 0.0169
Epoch 13/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0026 - val_loss: 0.0148
Epoch 14/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0030 - val_loss: 0.0133
Epoch 15/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0025 - val_loss: 0.0115
Epoch 16/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0026 - val_loss: 0.0123
Epoch 17/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0029 - val_loss: 0.0143
Epoch 18/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0039 - val_loss: 0.0116
Epoch 19/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0024 - val_loss: 0.0108
Epoch 20/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0118
Epoch 21/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0022 - val_loss: 0.0106
Epoch 22/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0029 - val_loss: 0.0132
Epoch 23/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0023 - val_loss: 0.0104
Epoch 24/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0025 - val_loss: 0.0123
Epoch 25/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0023 - val_loss: 0.0110
Epoch 26/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0022 - val_loss: 0.0118
Epoch 27/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0026 - val_loss: 0.0109
Epoch 28/50
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0028 - val_loss: 0.0113
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                     │ (None, 32)             │         4,352 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 1)              │            33 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 13,157 (51.40 KB)
 Trainable params: 4,385 (17.13 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 8,772 (34.27 KB)

7. Forecast¶

In [160]:
y_pred_scaled = model.predict(X_test)

# Inverse transform
train_inv = scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
val_inv = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
test_inv = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_pred = scaler.inverse_transform(y_pred_scaled).flatten()
3/3 ━━━━━━━━━━━━━━━━━━━━ 1s 249ms/step
In [161]:
# 8. Create indices for plotting
train_idx = df.iloc[:len(y_train)].index
val_idx = df.iloc[len(y_train):len(y_train)+len(y_val)].index
test_idx = df.iloc[len(y_train)+len(y_val):len(y_train)+len(y_val)+len(y_test)].index

10. Evaluation¶

In [163]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100

def directional_accuracy(y_true, y_pred):
    true_direction = np.sign(np.diff(y_true))
    pred_direction = np.sign(np.diff(y_pred))
    return np.mean(true_direction == pred_direction) * 100

# Evaluation metrics
rmse = np.sqrt(mean_squared_error(test_inv, y_pred))
mae = mean_absolute_error(test_inv, y_pred)
mape = safe_mape(test_inv, y_pred)
r2 = r2_score(test_inv, y_pred)
da = directional_accuracy(test_inv, y_pred)

# Print results
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 164.2725, MAE: 65.7785, MAPE: 4.94%, R²: 0.6425, Directional Accuracy: 25.00%
In [164]:
# Create a DataFrame with true and predicted values
results_df = pd.DataFrame({
    'True_Values': test_inv,
    'Predicted_Values': y_pred
})

# Save to TSV file
results_df.to_csv('true_vs_predicted.tsv', sep='\t', index=False)
print("\nSaved true vs predicted values to 'true_vs_predicted.tsv'")
Saved true vs predicted values to 'true_vs_predicted.tsv'

9. Plot¶

In [166]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train_inv, label='Train', color='blue')
plt.plot(val_idx, val_inv, label='Validation', color='green')
plt.plot(test_idx, test_inv, label='Test (Actual)', color='black')
plt.plot(test_idx, y_pred, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('LSTM Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image
In [167]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test_inv, label='Test (Actual)', color='black')
plt.plot(test_idx, y_pred, label='Test (Predicted)', color='red', linestyle='--')
Out[167]:
[<matplotlib.lines.Line2D at 0x2389ee2a7b0>]
No description has been provided for this image
In [168]:
# 11. Calculate residuals
residuals = test_inv - y_pred

# 12. Plot residuals
plt.figure(figsize=(15,7))
plt.plot(test_idx, residuals, label='Residuals', color='purple')
plt.axhline(0, color='black', linestyle='--')  # Zero line for reference
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.title('LSTM Model Residuals Plot')
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image
In [13]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: Data Preparation for LSTM ---
# Use original data directly (no EMD decomposition)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.reshape(-1, 1))

# Create sequences for LSTM
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 2: Hyperparameter Tuning for LSTM ---
def build_model(hp):
    model = Sequential()
    
    # Number of LSTM layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting LSTM hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='lstm_tuning',
    project_name='cardamom_lstm'
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
    print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")

# --- Step 3: Build and Train Final LSTM Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final LSTM model...")
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 4: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]

# --- Step 5: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on original data
metrics = evaluate_forecast(y_actual, y_pred)

print("\n" + "="*60)
print("LSTM MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print("\nLSTM Model Architecture:")
final_model.summary()

print("\n" + "="*60)
print("EVALUATION METRICS")
print("="*60)
for metric, value in metrics.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 6: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]

plt.figure(figsize=(18, 12))

# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Full Data with Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', color='blue', alpha=0.7)
plt.plot(test_dates, y_pred, label='LSTM Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original Data vs LSTM Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates, 
                 y_pred - metrics['RMSE'], 
                 y_pred + metrics['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - LSTM Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result3.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 7: Residual Analysis ---
residuals = y_actual - y_pred



plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('LSTM Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('LSTM Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result7.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nLSTM Residual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 8: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST - LSTM MODEL (NEXT 12 WEEKS)")
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='LSTM Future Forecast', color='red', linestyle='--', linewidth=2)
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('LSTM Future Price Forecast (Next 12 Weeks)')
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/lstm_result8.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- LSTM Benefits Summary ---
print("\n" + "="*60)
print("LSTM MODEL ADVANTAGES")
print("="*60)
print("1. Long-term Memory: Handles long-term dependencies effectively")
print("2. Sequence Learning: Excellent at learning temporal patterns")
print("3. Gate Mechanism: Input, forget, and output gates control information flow")
print("4. Vanishing Gradient Solution: Better than simple RNNs for long sequences")
print("5. Non-linear Modeling: Captures complex non-linear relationships")
print("6. Robustness: Handles noise and missing data well")
print("7. Proven Performance: Extensive successful applications in time series")
print("8. Flexibility: Can model various time series patterns and seasonalities")
print("9. Automatic Feature Learning: Learns relevant features from raw data")
print("10. Scalability: Can handle large datasets efficiently")

# --- Additional: Training vs Validation Performance Analysis ---
print("\n" + "="*60)
print("TRAINING PERFORMANCE ANALYSIS")
print("="*60)
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_train_mae = history.history['mae'][-1]
final_val_mae = history.history['val_mae'][-1]

print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training MAE: {final_train_mae:.4f}")
print(f"Final Validation MAE: {final_val_mae:.4f}")

# Check for overfitting
if final_val_loss > final_train_loss * 1.1:
    print("Warning: Potential overfitting detected (validation loss significantly higher than training loss)")
else:
    print("Good: Model shows no signs of overfitting")
Original data length: 722
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting LSTM hyperparameter tuning...
Reloading Tuner from lstm_tuning\cardamom_lstm\tuner0.json

Best Hyperparameters:
Number of LSTM layers: 2
Learning rate: 0.0010810344243983956
LSTM layer 1 units: 160
LSTM layer 1 dropout: 0.5
LSTM layer 2 units: 32
LSTM layer 2 dropout: 0.30000000000000004

Training final LSTM model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 7s 92ms/step - loss: 0.0615 - mae: 0.1784 - val_loss: 0.0061 - val_mae: 0.0665
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0237 - mae: 0.0979 - val_loss: 0.0042 - val_mae: 0.0557
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0151 - mae: 0.0725 - val_loss: 0.0018 - val_mae: 0.0314
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0117 - mae: 0.0629 - val_loss: 0.0015 - val_mae: 0.0269
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0112 - mae: 0.0615 - val_loss: 0.0023 - val_mae: 0.0344
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0117 - mae: 0.0613 - val_loss: 0.0025 - val_mae: 0.0381
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0077 - mae: 0.0516 - val_loss: 0.0017 - val_mae: 0.0294
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0080 - mae: 0.0528 - val_loss: 0.0015 - val_mae: 0.0261
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0084 - mae: 0.0515 - val_loss: 0.0014 - val_mae: 0.0244
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0076 - mae: 0.0479 - val_loss: 0.0018 - val_mae: 0.0310
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 0.0118 - mae: 0.0612 - val_loss: 0.0023 - val_mae: 0.0381
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0503 - val_loss: 0.0029 - val_mae: 0.0462
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0113 - mae: 0.0575 - val_loss: 0.0013 - val_mae: 0.0234
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0089 - mae: 0.0499 - val_loss: 0.0018 - val_mae: 0.0327
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0103 - mae: 0.0624 - val_loss: 0.0041 - val_mae: 0.0556
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0094 - mae: 0.0592 - val_loss: 0.0018 - val_mae: 0.0304
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0064 - mae: 0.0448 - val_loss: 0.0015 - val_mae: 0.0260
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0079 - mae: 0.0499 - val_loss: 0.0014 - val_mae: 0.0248
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0073 - mae: 0.0461 - val_loss: 0.0013 - val_mae: 0.0230
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0479 - val_loss: 0.0013 - val_mae: 0.0229
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0089 - mae: 0.0491 - val_loss: 0.0016 - val_mae: 0.0269
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0072 - mae: 0.0461 - val_loss: 0.0013 - val_mae: 0.0225
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0108 - mae: 0.0562 - val_loss: 0.0015 - val_mae: 0.0271
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0453 - val_loss: 0.0018 - val_mae: 0.0328
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0125 - mae: 0.0581 - val_loss: 0.0017 - val_mae: 0.0262
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0057 - mae: 0.0462 - val_loss: 0.0015 - val_mae: 0.0273
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0067 - mae: 0.0459 - val_loss: 0.0012 - val_mae: 0.0221
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0482 - val_loss: 0.0013 - val_mae: 0.0244
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0069 - mae: 0.0450 - val_loss: 0.0052 - val_mae: 0.0629
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0077 - mae: 0.0517 - val_loss: 0.0018 - val_mae: 0.0307
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0082 - mae: 0.0526 - val_loss: 0.0014 - val_mae: 0.0280
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0507 - val_loss: 0.0013 - val_mae: 0.0225
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0091 - mae: 0.0497 - val_loss: 0.0025 - val_mae: 0.0396
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0058 - mae: 0.0423 - val_loss: 0.0014 - val_mae: 0.0248
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0103 - mae: 0.0537 - val_loss: 0.0014 - val_mae: 0.0263
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0080 - mae: 0.0482 - val_loss: 0.0021 - val_mae: 0.0381
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0498 - val_loss: 0.0014 - val_mae: 0.0280
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0101 - mae: 0.0525 - val_loss: 0.0011 - val_mae: 0.0219
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0080 - mae: 0.0462 - val_loss: 0.0014 - val_mae: 0.0251
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0434 - val_loss: 0.0016 - val_mae: 0.0279
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0067 - mae: 0.0433 - val_loss: 0.0016 - val_mae: 0.0285
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0413 - val_loss: 0.0012 - val_mae: 0.0248
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0059 - mae: 0.0442 - val_loss: 0.0012 - val_mae: 0.0221
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0104 - mae: 0.0519 - val_loss: 0.0011 - val_mae: 0.0216
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0436 - val_loss: 0.0013 - val_mae: 0.0256
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0066 - mae: 0.0432 - val_loss: 0.0021 - val_mae: 0.0392
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0072 - mae: 0.0449 - val_loss: 0.0011 - val_mae: 0.0209
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0070 - mae: 0.0441 - val_loss: 0.0013 - val_mae: 0.0258
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0064 - mae: 0.0469 - val_loss: 0.0032 - val_mae: 0.0495
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0071 - mae: 0.0485 - val_loss: 0.0011 - val_mae: 0.0209
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0413 - val_loss: 0.0011 - val_mae: 0.0207
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0395 - val_loss: 0.0013 - val_mae: 0.0271
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0062 - mae: 0.0429 - val_loss: 0.0010 - val_mae: 0.0200
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0064 - mae: 0.0424 - val_loss: 9.9059e-04 - val_mae: 0.0199
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0075 - mae: 0.0460 - val_loss: 0.0012 - val_mae: 0.0232
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0061 - mae: 0.0415 - val_loss: 0.0026 - val_mae: 0.0424
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0051 - mae: 0.0416 - val_loss: 0.0011 - val_mae: 0.0208
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0049 - mae: 0.0398 - val_loss: 0.0023 - val_mae: 0.0370
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0053 - mae: 0.0413 - val_loss: 0.0017 - val_mae: 0.0305
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0074 - mae: 0.0465 - val_loss: 0.0010 - val_mae: 0.0208
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0065 - mae: 0.0424 - val_loss: 0.0010 - val_mae: 0.0202
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0067 - mae: 0.0424 - val_loss: 8.4845e-04 - val_mae: 0.0185
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0449 - val_loss: 0.0021 - val_mae: 0.0397
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0069 - mae: 0.0465 - val_loss: 0.0045 - val_mae: 0.0634
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0081 - mae: 0.0554 - val_loss: 9.3334e-04 - val_mae: 0.0195
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0403 - val_loss: 0.0012 - val_mae: 0.0248
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0058 - mae: 0.0400 - val_loss: 0.0014 - val_mae: 0.0272
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0057 - mae: 0.0421 - val_loss: 0.0011 - val_mae: 0.0251
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0050 - mae: 0.0390 - val_loss: 9.0703e-04 - val_mae: 0.0193
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0068 - mae: 0.0419 - val_loss: 0.0014 - val_mae: 0.0260
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0060 - mae: 0.0418 - val_loss: 8.2340e-04 - val_mae: 0.0187
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0046 - mae: 0.0390 - val_loss: 0.0012 - val_mae: 0.0259
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0076 - mae: 0.0418 - val_loss: 0.0010 - val_mae: 0.0222
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0391 - val_loss: 9.6502e-04 - val_mae: 0.0213
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0395 - val_loss: 0.0014 - val_mae: 0.0275
Epoch 76/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0042 - mae: 0.0397 - val_loss: 0.0011 - val_mae: 0.0255
Epoch 77/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0055 - mae: 0.0408 - val_loss: 0.0013 - val_mae: 0.0292
Epoch 78/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0410 - val_loss: 0.0011 - val_mae: 0.0224
Epoch 79/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0399 - val_loss: 8.7375e-04 - val_mae: 0.0196
Epoch 80/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0060 - mae: 0.0382 - val_loss: 7.8255e-04 - val_mae: 0.0184
Epoch 81/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0056 - mae: 0.0393 - val_loss: 0.0010 - val_mae: 0.0242
Epoch 82/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0051 - mae: 0.0391 - val_loss: 9.3849e-04 - val_mae: 0.0222
Epoch 83/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0061 - mae: 0.0398 - val_loss: 0.0011 - val_mae: 0.0227
Epoch 84/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 53ms/step - loss: 0.0067 - mae: 0.0411 - val_loss: 8.2395e-04 - val_mae: 0.0200
Epoch 85/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0049 - mae: 0.0373 - val_loss: 0.0012 - val_mae: 0.0281
Epoch 86/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0075 - mae: 0.0428 - val_loss: 0.0011 - val_mae: 0.0252
Epoch 87/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0054 - mae: 0.0389 - val_loss: 0.0011 - val_mae: 0.0253
Epoch 88/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0391 - val_loss: 8.6970e-04 - val_mae: 0.0209
Epoch 89/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0068 - mae: 0.0433 - val_loss: 0.0011 - val_mae: 0.0227
Epoch 90/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0075 - mae: 0.0436 - val_loss: 8.6688e-04 - val_mae: 0.0194
Epoch 91/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0055 - mae: 0.0428 - val_loss: 0.0011 - val_mae: 0.0268
Epoch 92/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0038 - mae: 0.0323 - val_loss: 9.2764e-04 - val_mae: 0.0226
Epoch 93/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 0.0053 - mae: 0.0404 - val_loss: 0.0015 - val_mae: 0.0313
Epoch 94/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0037 - mae: 0.0353 - val_loss: 0.0014 - val_mae: 0.0307
Epoch 95/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0047 - mae: 0.0376 - val_loss: 0.0021 - val_mae: 0.0383
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 160ms/step

============================================================
LSTM MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 95
Best validation loss: 0.0008
Best validation MAE: 0.0184
Lookback period: 52 weeks

LSTM Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                          │ (None, 52, 160)             │         103,680 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout)                    │ (None, 52, 160)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ lstm_1 (LSTM)                        │ (None, 32)                  │          24,704 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_1 (Dropout)                  │ (None, 32)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 112)                 │           3,696 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_2 (Dropout)                  │ (None, 112)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 1)                   │             113 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 396,581 (1.51 MB)
 Trainable params: 132,193 (516.38 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 264,388 (1.01 MB)
============================================================
EVALUATION METRICS
============================================================
MSE: 62558.7593
RMSE: 250.1175
MAE: 157.0250
MAPE: 0.09%
R²: 0.6514
Directional Accuracy: 17.82%
<Figure size 1800x1200 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
LSTM Residual Analysis:
Residual mean: 64.5514
Residual std: 241.6441
Residual min: -481.2880
Residual max: 1415.6563

==================================================
FUTURE FORECAST - LSTM MODEL (NEXT 12 WEEKS)
==================================================
2024-11-03: 2111.44
2024-11-10: 2051.00
2024-11-17: 1987.99
2024-11-24: 1940.50
2024-12-01: 1911.20
2024-12-08: 1897.13
2024-12-15: 1893.15
2024-12-22: 1893.74
2024-12-29: 1894.32
2025-01-05: 1891.50
2025-01-12: 1883.74
2025-01-19: 1870.96
No description has been provided for this image
============================================================
LSTM MODEL ADVANTAGES
============================================================
1. Long-term Memory: Handles long-term dependencies effectively
2. Sequence Learning: Excellent at learning temporal patterns
3. Gate Mechanism: Input, forget, and output gates control information flow
4. Vanishing Gradient Solution: Better than simple RNNs for long sequences
5. Non-linear Modeling: Captures complex non-linear relationships
6. Robustness: Handles noise and missing data well
7. Proven Performance: Extensive successful applications in time series
8. Flexibility: Can model various time series patterns and seasonalities
9. Automatic Feature Learning: Learns relevant features from raw data
10. Scalability: Can handle large datasets efficiently

============================================================
TRAINING PERFORMANCE ANALYSIS
============================================================
Final Training Loss: 0.0049
Final Validation Loss: 0.0021
Final Training MAE: 0.0383
Final Validation MAE: 0.0383
Good: Model shows no signs of overfitting
In [9]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)

# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")

# Model architecture details
for i, layer in enumerate(final_model.layers):
    print(f"\nLayer {i+1}: {layer.__class__.__name__}")
    try:
        print(f"  Units: {layer.units}")
    except:
        pass
    try:
        print(f"  Activation: {layer.activation.__name__}")
    except:
        pass
    try:
        print(f"  Dropout Rate: {layer.rate}")
    except:
        pass
    if hasattr(layer, "return_sequences"):
        print(f"  Return Sequences: {layer.return_sequences}")

# Training summary
print("\nTraining Details:")
print(f"Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")


print("\n" + "="*60)
print("NOTE: The above configuration includes optimizer, activation functions, "
      "learning rate, and automatic layer details for full reproducibility.")
print("="*60)
============================================================
GRU MODEL CONFIGURATION & TRAINING DETAILS
============================================================
Optimizer: Adam
Learning Rate: 0.001081034424714744

Layer 1: LSTM
  Units: 160
  Activation: tanh
  Return Sequences: True

Layer 2: Dropout
  Dropout Rate: 0.5

Layer 3: LSTM
  Units: 32
  Activation: tanh
  Return Sequences: False

Layer 4: Dropout
  Dropout Rate: 0.30000000000000004

Layer 5: Dense
  Units: 112
  Activation: relu

Layer 6: Dropout
  Dropout Rate: 0.30000000000000004

Layer 7: Dense
  Units: 1
  Activation: linear

Training Details:
Epochs Trained: 47
Final Training Loss: 0.0063
Final Validation Loss: 0.0020
Final Training MAE: 0.0422
Final Validation MAE: 0.0325

============================================================
NOTE: The above configuration includes optimizer, activation functions, learning rate, and automatic layer details for full reproducibility.
============================================================
In [3]:
plt.figure(figsize=(18, 12))


# Plot 3: Separate View - Actual vs Predicted

plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)

plt.title('Actual vs Predicted - LSTM Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
 

--- GRU ---¶

--- Import libraries for GRU ----¶

In [338]:
!pip install keras-tuner
Requirement already satisfied: keras-tuner in c:\users\marti\anaconda3\lib\site-packages (1.4.7)
Requirement already satisfied: keras in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (3.5.0)
Requirement already satisfied: packaging in c:\users\marti\appdata\roaming\python\python312\site-packages (from keras-tuner) (24.1)
Requirement already satisfied: requests in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (2.32.2)
Requirement already satisfied: kt-legacy in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (1.0.5)
Requirement already satisfied: absl-py in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (2.1.0)
Requirement already satisfied: numpy in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (1.26.4)
Requirement already satisfied: rich in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (13.3.5)
Requirement already satisfied: namex in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.0.8)
Requirement already satisfied: h5py in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (3.11.0)
Requirement already satisfied: optree in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.12.1)
Requirement already satisfied: ml-dtypes in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.4.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2.2.2)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2024.7.4)
Requirement already satisfied: typing-extensions>=4.5.0 in c:\users\marti\anaconda3\lib\site-packages (from optree->keras->keras-tuner) (4.11.0)
Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in c:\users\marti\anaconda3\lib\site-packages (from rich->keras->keras-tuner) (2.2.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\marti\appdata\roaming\python\python312\site-packages (from rich->keras->keras-tuner) (2.18.0)
Requirement already satisfied: mdurl~=0.1 in c:\users\marti\anaconda3\lib\site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->keras->keras-tuner) (0.1.0)
Could not fetch URL https://pypi.org/simple/pip/: There was a problem confirming the ssl certificate: HTTPSConnectionPool(host='pypi.org', port=443): Max retries exceeded with url: /simple/pip/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)'))) - skipping
In [15]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout  # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)

df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: Data Preparation for GRU ---
# Use original data directly
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.reshape(-1, 1))

# Create sequences for GRU
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 2: Hyperparameter Tuning for GRU ---
def build_model(hp):
    model = Sequential()
    
    # Number of GRU layers (changed from LSTM to GRU)
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(GRU(  # Changed LSTM to GRU
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='gru_tuning',  # Changed directory name
    project_name='cardamom_gru'  # Changed project name
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}")  # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}")  # Changed from LSTM to GRU
    print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")  # Changed from LSTM to GRU

# --- Step 3: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final GRU model...")  # Changed to GRU
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 4: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]

# --- Step 5: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on original data
metrics = evaluate_forecast(y_actual, y_pred)

print("\n" + "="*60)
print("GRU MODEL TRAINING SUMMARY")  # Changed to GRU
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print("\nGRU Model Architecture:")  # Changed to GRU
final_model.summary()

print("\n" + "="*60)
print("EVALUATION METRICS")
print("="*60)
for metric, value in metrics.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 6: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]



# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('GRU Training History')  # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result8.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Full Data with Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', color='blue', alpha=0.7)
plt.plot(test_dates, y_pred, label='GRU Forecast', color='red', linestyle='--', linewidth=2)  # Changed to GRU
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original Data vs GRU Forecast')  # Changed to GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result7.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)  # Added GRU
plt.fill_between(test_dates, 
                 y_pred - metrics['RMSE'], 
                 y_pred + metrics['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - GRU Model (Test Period)')  # Changed to GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result6.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 7: Residual Analysis ---
residuals = y_actual - y_pred



plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Residuals Over Time')  # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('GRU Residual Distribution')  # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result3.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result2.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nGRU Residual Analysis:")  # Changed to GRU
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 8: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)")  # Changed to GRU
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='GRU Future Forecast', color='red', linestyle='--', linewidth=2)  # Changed to GRU
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('GRU Future Price Forecast (Next 12 Weeks)')  # Changed to GRU
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/gru_result1.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- GRU Benefits Summary ---
print("\n" + "="*60)
print("GRU MODEL ADVANTAGES OVER LSTM")
print("="*60)
print("1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)")
print("2. Faster Training: Less complex architecture leads to faster training times")
print("3. Better Performance: Often performs better on smaller datasets")
print("4. Reduced Overfitting: Simpler architecture can be less prone to overfitting")
print("5. Memory Efficiency: Uses less memory during training and inference")
print("6. Faster Convergence: Typically converges faster than LSTM")
print("7. Better Gradient Flow: Simpler architecture improves gradient propagation")
print("8. Simpler Architecture: Easier to train and tune")
print("9. Comparable Performance: Often achieves similar results to LSTM with less complexity")
print("10. Efficient Resource Usage: Better for resource-constrained environments")

# --- Additional: Training vs Validation Performance Analysis ---
print("\n" + "="*60)
print("TRAINING PERFORMANCE ANALYSIS")
print("="*60)
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_train_mae = history.history['mae'][-1]
final_val_mae = history.history['val_mae'][-1]

print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training MAE: {final_train_mae:.4f}")
print(f"Final Validation MAE: {final_val_mae:.4f}")

# Check for overfitting
if final_val_loss > final_train_loss * 1.1:
    print("Warning: Potential overfitting detected (validation loss significantly higher than training loss)")
else:
    print("Good: Model shows no signs of overfitting")

# --- GRU vs LSTM Comparison ---
print("\n" + "="*60)
print("GRU vs LSTM COMPARISON")
print("="*60)
print("GRU Advantages:")
print("- 30% fewer parameters than equivalent LSTM")
print("- 20-30% faster training time")
print("- Simpler architecture with 2 gates (update and reset)")
print("- Better for smaller datasets")
print("- Less prone to overfitting")

print("\nLSTM Advantages:")
print("- More expressive power with 3 gates")
print("- Better for very long sequences")
print("- More established in research literature")
print("- Slightly better on some complex tasks")

print("\nRecommendation: GRU is often preferred for its efficiency and comparable performance!")
Original data length: 722
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting GRU hyperparameter tuning...
Reloading Tuner from gru_tuning\cardamom_gru\tuner0.json

Best Hyperparameters:
Number of GRU layers: 2
Learning rate: 0.0003473714958642173
GRU layer 1 units: 256
GRU layer 1 dropout: 0.30000000000000004
GRU layer 2 units: 224
GRU layer 2 dropout: 0.1

Training final GRU model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 9s 157ms/step - loss: 0.0371 - mae: 0.1428 - val_loss: 0.0026 - val_mae: 0.0411
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 108ms/step - loss: 0.0090 - mae: 0.0510 - val_loss: 0.0045 - val_mae: 0.0612
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 108ms/step - loss: 0.0075 - mae: 0.0485 - val_loss: 9.5461e-04 - val_mae: 0.0200
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0072 - mae: 0.0397 - val_loss: 9.8309e-04 - val_mae: 0.0212
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0057 - mae: 0.0347 - val_loss: 0.0012 - val_mae: 0.0244
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0062 - mae: 0.0354 - val_loss: 0.0013 - val_mae: 0.0275
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0054 - mae: 0.0422 - val_loss: 8.8386e-04 - val_mae: 0.0196
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0058 - mae: 0.0344 - val_loss: 8.4164e-04 - val_mae: 0.0180
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0051 - mae: 0.0301 - val_loss: 8.5880e-04 - val_mae: 0.0188
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0312 - val_loss: 8.7369e-04 - val_mae: 0.0203
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0054 - mae: 0.0322 - val_loss: 9.8875e-04 - val_mae: 0.0224
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0037 - mae: 0.0262 - val_loss: 7.9651e-04 - val_mae: 0.0175
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0053 - mae: 0.0309 - val_loss: 0.0015 - val_mae: 0.0308
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0058 - mae: 0.0369 - val_loss: 7.7190e-04 - val_mae: 0.0186
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0272 - val_loss: 0.0011 - val_mae: 0.0252
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0049 - mae: 0.0313 - val_loss: 9.4658e-04 - val_mae: 0.0233
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0299 - val_loss: 0.0012 - val_mae: 0.0271
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0037 - mae: 0.0295 - val_loss: 0.0010 - val_mae: 0.0239
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0347 - val_loss: 6.8192e-04 - val_mae: 0.0173
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0294 - val_loss: 6.2613e-04 - val_mae: 0.0149
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0278 - val_loss: 7.3258e-04 - val_mae: 0.0192
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0037 - mae: 0.0283 - val_loss: 8.5199e-04 - val_mae: 0.0210
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0042 - mae: 0.0280 - val_loss: 0.0011 - val_mae: 0.0268
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 112ms/step - loss: 0.0035 - mae: 0.0309 - val_loss: 6.3899e-04 - val_mae: 0.0158
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0031 - mae: 0.0243 - val_loss: 5.8518e-04 - val_mae: 0.0140
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0321 - val_loss: 5.7091e-04 - val_mae: 0.0145
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0039 - mae: 0.0278 - val_loss: 0.0021 - val_mae: 0.0407
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0048 - mae: 0.0341 - val_loss: 7.1002e-04 - val_mae: 0.0181
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0028 - mae: 0.0244 - val_loss: 5.6439e-04 - val_mae: 0.0140
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0034 - mae: 0.0247 - val_loss: 6.8003e-04 - val_mae: 0.0176
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0032 - mae: 0.0248 - val_loss: 5.4630e-04 - val_mae: 0.0135
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0283 - val_loss: 7.4940e-04 - val_mae: 0.0194
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 111ms/step - loss: 0.0051 - mae: 0.0310 - val_loss: 5.7265e-04 - val_mae: 0.0143
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0278 - val_loss: 6.4447e-04 - val_mae: 0.0178
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0043 - mae: 0.0283 - val_loss: 5.3495e-04 - val_mae: 0.0136
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0043 - mae: 0.0295 - val_loss: 5.6560e-04 - val_mae: 0.0156
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0042 - mae: 0.0266 - val_loss: 9.6885e-04 - val_mae: 0.0253
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0028 - mae: 0.0273 - val_loss: 7.1013e-04 - val_mae: 0.0197
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0051 - mae: 0.0319 - val_loss: 6.3080e-04 - val_mae: 0.0165
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0040 - mae: 0.0267 - val_loss: 0.0011 - val_mae: 0.0262
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0290 - val_loss: 0.0015 - val_mae: 0.0325
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0051 - mae: 0.0332 - val_loss: 0.0010 - val_mae: 0.0251
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0044 - mae: 0.0285 - val_loss: 0.0013 - val_mae: 0.0304
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0037 - mae: 0.0339 - val_loss: 5.3816e-04 - val_mae: 0.0138
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0324 - val_loss: 5.4324e-04 - val_mae: 0.0139
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 112ms/step - loss: 0.0036 - mae: 0.0241 - val_loss: 7.5730e-04 - val_mae: 0.0195
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0027 - mae: 0.0236 - val_loss: 0.0015 - val_mae: 0.0326
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0038 - mae: 0.0335 - val_loss: 7.2088e-04 - val_mae: 0.0189
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0043 - mae: 0.0320 - val_loss: 5.6190e-04 - val_mae: 0.0157
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0292 - val_loss: 5.2849e-04 - val_mae: 0.0145
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0281 - val_loss: 5.3974e-04 - val_mae: 0.0137
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0036 - mae: 0.0259 - val_loss: 8.9868e-04 - val_mae: 0.0232
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0048 - mae: 0.0288 - val_loss: 8.2981e-04 - val_mae: 0.0217
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0299 - val_loss: 5.3542e-04 - val_mae: 0.0138
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0035 - mae: 0.0268 - val_loss: 7.8986e-04 - val_mae: 0.0209
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 111ms/step - loss: 0.0039 - mae: 0.0271 - val_loss: 0.0011 - val_mae: 0.0276
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0047 - mae: 0.0320 - val_loss: 0.0015 - val_mae: 0.0337
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0044 - mae: 0.0325 - val_loss: 5.9702e-04 - val_mae: 0.0157
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0280 - val_loss: 6.9913e-04 - val_mae: 0.0197
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0029 - mae: 0.0248 - val_loss: 5.0660e-04 - val_mae: 0.0137
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0026 - mae: 0.0243 - val_loss: 6.7132e-04 - val_mae: 0.0181
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0028 - mae: 0.0250 - val_loss: 8.6966e-04 - val_mae: 0.0236
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0032 - mae: 0.0278 - val_loss: 8.1659e-04 - val_mae: 0.0211
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0271 - val_loss: 0.0026 - val_mae: 0.0463
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0047 - mae: 0.0377 - val_loss: 5.5698e-04 - val_mae: 0.0143
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0049 - mae: 0.0259 - val_loss: 5.4018e-04 - val_mae: 0.0143
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0036 - mae: 0.0253 - val_loss: 8.1784e-04 - val_mae: 0.0211
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0035 - mae: 0.0255 - val_loss: 8.8462e-04 - val_mae: 0.0225
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 114ms/step - loss: 0.0037 - mae: 0.0274 - val_loss: 6.7853e-04 - val_mae: 0.0181
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0034 - mae: 0.0297 - val_loss: 5.0852e-04 - val_mae: 0.0140
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0030 - mae: 0.0248 - val_loss: 5.7793e-04 - val_mae: 0.0164
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0287 - val_loss: 5.1131e-04 - val_mae: 0.0134
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0247 - val_loss: 9.4247e-04 - val_mae: 0.0235
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0036 - mae: 0.0258 - val_loss: 7.4028e-04 - val_mae: 0.0193
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0026 - mae: 0.0224 - val_loss: 6.8610e-04 - val_mae: 0.0184
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 219ms/step

============================================================
GRU MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 75
Best validation loss: 0.0005
Best validation MAE: 0.0134
Lookback period: 52 weeks

GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ gru (GRU)                            │ (None, 52, 256)             │         198,912 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_3 (Dropout)                  │ (None, 52, 256)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ gru_1 (GRU)                          │ (None, 224)                 │         323,904 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_4 (Dropout)                  │ (None, 224)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_2 (Dense)                      │ (None, 1)                   │             225 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 1,569,125 (5.99 MB)
 Trainable params: 523,041 (2.00 MB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 1,046,084 (3.99 MB)
============================================================
EVALUATION METRICS
============================================================
MSE: 46948.7033
RMSE: 216.6765
MAE: 125.2471
MAPE: 0.07%
R²: 0.7384
Directional Accuracy: 18.81%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
GRU Residual Analysis:
Residual mean: 45.2228
Residual std: 211.9047
Residual min: -730.7085
Residual max: 1306.5748

==================================================
FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)
==================================================
2024-11-03: 2235.58
2024-11-10: 2187.44
2024-11-17: 2146.88
2024-11-24: 2117.85
2024-12-01: 2099.04
2024-12-08: 2087.23
2024-12-15: 2079.03
2024-12-22: 2071.83
2024-12-29: 2064.06
2025-01-05: 2055.10
2025-01-12: 2044.94
2025-01-19: 2033.91
No description has been provided for this image
============================================================
GRU MODEL ADVANTAGES OVER LSTM
============================================================
1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)
2. Faster Training: Less complex architecture leads to faster training times
3. Better Performance: Often performs better on smaller datasets
4. Reduced Overfitting: Simpler architecture can be less prone to overfitting
5. Memory Efficiency: Uses less memory during training and inference
6. Faster Convergence: Typically converges faster than LSTM
7. Better Gradient Flow: Simpler architecture improves gradient propagation
8. Simpler Architecture: Easier to train and tune
9. Comparable Performance: Often achieves similar results to LSTM with less complexity
10. Efficient Resource Usage: Better for resource-constrained environments

============================================================
TRAINING PERFORMANCE ANALYSIS
============================================================
Final Training Loss: 0.0041
Final Validation Loss: 0.0007
Final Training MAE: 0.0255
Final Validation MAE: 0.0184
Good: Model shows no signs of overfitting

============================================================
GRU vs LSTM COMPARISON
============================================================
GRU Advantages:
- 30% fewer parameters than equivalent LSTM
- 20-30% faster training time
- Simpler architecture with 2 gates (update and reset)
- Better for smaller datasets
- Less prone to overfitting

LSTM Advantages:
- More expressive power with 3 gates
- Better for very long sequences
- More established in research literature
- Slightly better on some complex tasks

Recommendation: GRU is often preferred for its efficiency and comparable performance!
In [12]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)

# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")

# Model architecture details
for i, layer in enumerate(final_model.layers):
    print(f"\nLayer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, "units"):
        print(f"  Units: {layer.units}")
    if hasattr(layer, "activation"):
        print(f"  Activation: {layer.activation.__name__}")
    if hasattr(layer, "rate"):
        print(f"  Dropout Rate: {layer.rate}")
    if hasattr(layer, "return_sequences"):
        print(f"  Return Sequences: {layer.return_sequences}")

# Training summary
print("\nTraining Summary:")
print(f"Total Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")

print("\n" + "="*60)
print("NOTE: This section auto-captures optimizer, learning rate, "
      "activation functions, and all layer details for reproducibility.")
print("="*60)
============================================================
GRU MODEL CONFIGURATION & TRAINING DETAILS
============================================================
Optimizer: Adam
Learning Rate: 0.00034737150417640805

Layer 1: GRU
  Units: 256
  Activation: tanh
  Return Sequences: True

Layer 2: Dropout
  Dropout Rate: 0.30000000000000004

Layer 3: GRU
  Units: 224
  Activation: tanh
  Return Sequences: False

Layer 4: Dropout
  Dropout Rate: 0.1

Layer 5: Dense
  Units: 1
  Activation: linear

Training Summary:
Total Epochs Trained: 62
Final Training Loss: 0.0043
Final Validation Loss: 0.0010
Final Training MAE: 0.0290
Final Validation MAE: 0.0259

============================================================
NOTE: This section auto-captures optimizer, learning rate, activation functions, and all layer details for reproducibility.
============================================================
In [13]:
plt.figure(figsize=(18, 12))



# Plot 3: Separate View - Actual vs Predicted

plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)  # Added GRU

plt.title('Actual vs Predicted - GRU Model (Test Period)')  # Changed to GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
No description has been provided for this image

--- Fourier + ARIMA ---¶

--- Import libraries for Fourier + ARIMA and SARIMA----¶

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from pmdarima import auto_arima
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()

print(f"Using original data length: {len(prices)}")

# --- Fourier Feature Engineering ---
def generate_fourier_features(series, n_harmonics=5, period=52):
    """Generate Fourier features for seasonal patterns"""
    n = len(series)
    t = np.arange(n)
    
    fourier_features = pd.DataFrame(index=series.index)
    
    for k in range(1, n_harmonics + 1):
        fourier_features[f'cos_{k}'] = np.cos(2 * np.pi * k * t / period)
        fourier_features[f'sin_{k}'] = np.sin(2 * np.pi * k * t / period)
    
    return fourier_features

# Generate Fourier features
fourier_features = generate_fourier_features(prices, n_harmonics=5, period=52)

# --- Step 1: Train/Test Split ---
split1 = int(0.7 * len(prices))
split2 = int(0.85 * len(prices))
train, val, test = prices.iloc[:split1], prices.iloc[split1:split2], prices.iloc[split2:]

# Split Fourier features accordingly
fourier_train = fourier_features.iloc[:split1]
fourier_val = fourier_features.iloc[split1:split2]
fourier_test = fourier_features.iloc[split2:]

# Combine train and validation for final training
full_train = pd.concat([train, val])
full_fourier = pd.concat([fourier_train, fourier_val])

# --- Option 1: Fourier ARIMA ---
print("\n" + "="*50)
print("FOURIER ARIMA MODEL")
print("="*50)

# Auto ARIMA for optimal order (non-seasonal since Fourier handles seasonality)
auto_model_arima = auto_arima(
    full_train,
    seasonal=False,  # No seasonal component - Fourier handles seasonality
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore',
    trace=True,
    max_order=10,
    information_criterion='aic',
    test='adf'
)

print(f"Optimal ARIMA order: {auto_model_arima.order}")

# Build Fourier ARIMA model
fourier_arima_model = ARIMA(
    full_train,
    order=auto_model_arima.order,
    exog=full_fourier
)

fourier_arima_result = fourier_arima_model.fit()

print("\nFOURIER ARIMA MODEL SUMMARY")
print("="*50)
print(fourier_arima_result.summary())

# Fourier ARIMA forecasting
fourier_arima_forecast = fourier_arima_result.get_forecast(
    steps=len(test), 
    exog=fourier_test
)
fourier_arima_mean = fourier_arima_forecast.predicted_mean
fourier_arima_conf_int = fourier_arima_forecast.conf_int()

# --- Option 2: Fourier SARIMA ---
print("\n" + "="*50)
print("FOURIER SARIMA MODEL")
print("="*50)

# Auto SARIMA for optimal order (with reduced seasonal component since Fourier helps)
auto_model_sarima = auto_arima(
    full_train,
    seasonal=True,
    m=26,  # Reduced seasonal period since Fourier helps
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore',
    trace=True,
    max_order=10,
    information_criterion='aic',
    test='adf'
)

print(f"Optimal SARIMA order: {auto_model_sarima.order}")
print(f"Optimal Seasonal order: {auto_model_sarima.seasonal_order}")

# Build Fourier SARIMA model
fourier_sarima_model = SARIMAX(
    endog=full_train,
    exog=full_fourier,
    order=auto_model_sarima.order,
    seasonal_order=auto_model_sarima.seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)

fourier_sarima_result = fourier_sarima_model.fit(disp=False)

print("\nFOURIER SARIMA MODEL SUMMARY")
print("="*50)
print(fourier_sarima_result.summary())

# Fourier SARIMA forecasting
fourier_sarima_forecast = fourier_sarima_result.get_forecast(
    steps=len(test), 
    exog=fourier_test
)
fourier_sarima_mean = fourier_sarima_forecast.predicted_mean
fourier_sarima_conf_int = fourier_sarima_forecast.conf_int()

# --- Evaluation Function ---
def evaluate_forecast(actual, forecast, model_name):
    """Comprehensive forecast evaluation"""
    actual = np.asarray(actual)
    forecast = np.asarray(forecast)
    
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    
    with np.errstate(divide='ignore', invalid='ignore'):
        ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
        mape = np.nanmean(ape) * 100
    
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = np.mean(actual_diff == forecast_diff) * 100
    
    metrics = {
        'MSE': f"{mse:.4f}",
        'RMSE': f"{rmse:.4f}",
        'MAE': f"{mae:.4f}",
        'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
        'R²': f"{r2:.4f}",
        'Directional Accuracy': f"{da:.2f}%"
    }
    
    print(f"\n{model_name} EVALUATION METRICS")
    print("="*50)
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    
    return metrics

# Evaluate both models
fourier_arima_metrics = evaluate_forecast(test.values, fourier_arima_mean.values, "FOURIER ARIMA")
fourier_sarima_metrics = evaluate_forecast(test.values, fourier_sarima_mean.values, "FOURIER SARIMA")

plt.figure(figsize=(12, 6))
fourier_features.iloc[:, :4].plot(ax=plt.gca())  # Show first 4 Fourier components
plt.title('Fourier Features (First 4 Components)')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.ylabel('Price (Rs./kg)')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Fourier ARIMA Results
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:len(full_train)], full_train, label='Train+Val', color='blue', alpha=0.7)
plt.plot(test.index, test, label='Actual Test', color='green', linewidth=2)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(test.index, fourier_arima_conf_int.iloc[:, 0], fourier_arima_conf_int.iloc[:, 1], 
                 color='pink', alpha=0.3, label='95% CI')
plt.axvline(test.index[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Fourier ARIMA: Order {auto_model_arima.order}')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 4: Fourier SARIMA Results
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:len(full_train)], full_train, label='Train+Val', color='blue', alpha=0.7)
plt.plot(test.index, test, label='Actual Test', color='green', linewidth=2)
plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.fill_between(test.index, fourier_sarima_conf_int.iloc[:, 0], fourier_sarima_conf_int.iloc[:, 1], 
                 color='lightblue', alpha=0.3, label='95% CI')
plt.axvline(test.index[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Fourier SARIMA: Order {auto_model_sarima.order}{auto_model_sarima.seasonal_order}')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA', color='red', linestyle='--', linewidth=2)
plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA', color='orange', linestyle='--', linewidth=2)
plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 6: Residuals Comparison
plt.figure(figsize=(12, 6))
arima_residuals = test.values - fourier_arima_mean.values
sarima_residuals = test.values - fourier_sarima_mean.values
plt.plot(test.index, arima_residuals, label='Fourier ARIMA Residuals', color='red', alpha=0.7)
plt.plot(test.index, sarima_residuals, label='Fourier SARIMA Residuals', color='orange', alpha=0.7)
plt.axhline(0, color='black', linestyle='--')
plt.title('Residuals Comparison')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result6.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Future Forecasting ---
def forecast_future_fourier(model_result, fourier_features, steps=12, period=52):
    """Forecast future values with Fourier features"""
    # Create future dates
    last_date = prices.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    # Create Fourier features for future periods
    n = len(prices)
    future_t = np.arange(n, n + steps)
    future_fourier = pd.DataFrame(index=future_dates)
    
    for k in range(1, 6):  # 5 harmonics
        future_fourier[f'cos_{k}'] = np.cos(2 * np.pi * k * future_t / period)
        future_fourier[f'sin_{k}'] = np.sin(2 * np.pi * k * future_t / period)
    
    # Forecast
    if hasattr(model_result, 'get_forecast'):
        future_forecast = model_result.get_forecast(steps=steps, exog=future_fourier)
        return future_dates, future_forecast.predicted_mean, future_forecast.conf_int()
    else:
        # For ARIMA models
        future_forecast = model_result.forecast(steps=steps, exog=future_fourier)
        return future_dates, future_forecast, None

# Forecast next 12 weeks with both models
print("\n" + "="*50)
print("FUTURE FORECAST (NEXT 12 WEEKS)")
print("="*50)

try:
    # Fourier ARIMA future forecast
    arima_future_dates, arima_future_prices, arima_future_ci = forecast_future_fourier(
        fourier_arima_result, fourier_features, steps=12
    )
    print("\nFourier ARIMA Future Forecast:")
    for date, price in zip(arima_future_dates, arima_future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
    print(f"Fourier ARIMA future forecasting failed: {e}")

try:
    # Fourier SARIMA future forecast
    sarima_future_dates, sarima_future_prices, sarima_future_ci = forecast_future_fourier(
        fourier_sarima_result, fourier_features, steps=12
    )
    print("\nFourier SARIMA Future Forecast:")
    for date, price in zip(sarima_future_dates, sarima_future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
    print(f"Fourier SARIMA future forecasting failed: {e}")

# --- Model Comparison Summary ---
print("\n" + "="*50)
print("MODEL COMPARISON SUMMARY")
print("="*50)
print(f"{'Metric':<20} {'Fourier ARIMA':<15} {'Fourier SARIMA':<15}")
print("-" * 50)
for metric in ['MSE', 'RMSE', 'MAE', 'MAPE', 'R²', 'Directional Accuracy']:
    print(f"{metric:<20} {fourier_arima_metrics[metric]:<15} {fourier_sarima_metrics[metric]:<15}")

# Determine best model based on RMSE
rmse_arima = float(fourier_arima_metrics['RMSE'])
rmse_sarima = float(fourier_sarima_metrics['RMSE'])

if rmse_arima < rmse_sarima:
    print(f"\nBest Model: Fourier ARIMA (Lower RMSE: {rmse_arima:.4f} vs {rmse_sarima:.4f})")
    best_model = fourier_arima_result
    best_model_name = "Fourier ARIMA"
else:
    print(f"\nBest Model: Fourier SARIMA (Lower RMSE: {rmse_sarima:.4f} vs {rmse_arima:.4f})")
    best_model = fourier_sarima_result
    best_model_name = "Fourier SARIMA"

print(f"\nSelected {best_model_name} as the best performing model")
Using original data length: 722

==================================================
FOURIER ARIMA MODEL
==================================================
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=7916.653, Time=0.81 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=7967.513, Time=0.09 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=7950.813, Time=0.12 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=7948.381, Time=0.15 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=7965.525, Time=0.09 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=7921.770, Time=0.71 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=7921.756, Time=0.60 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=7914.388, Time=1.08 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=7913.030, Time=0.85 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=7932.289, Time=0.17 sec
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AIC=7914.640, Time=1.23 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=7952.034, Time=0.15 sec
 ARIMA(4,1,0)(0,0,0)[0] intercept   : AIC=7930.195, Time=0.21 sec
 ARIMA(4,1,2)(0,0,0)[0] intercept   : AIC=7914.776, Time=1.36 sec
 ARIMA(3,1,1)(0,0,0)[0]             : AIC=7911.076, Time=0.43 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=7919.798, Time=0.35 sec
 ARIMA(3,1,0)(0,0,0)[0]             : AIC=7930.310, Time=0.12 sec
 ARIMA(4,1,1)(0,0,0)[0]             : AIC=7912.686, Time=0.50 sec
 ARIMA(3,1,2)(0,0,0)[0]             : AIC=7912.433, Time=0.57 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=7950.050, Time=0.11 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=7914.694, Time=0.38 sec
 ARIMA(4,1,0)(0,0,0)[0]             : AIC=7928.219, Time=0.15 sec
 ARIMA(4,1,2)(0,0,0)[0]             : AIC=7912.821, Time=0.71 sec

Best model:  ARIMA(3,1,1)(0,0,0)[0]          
Total fit time: 10.950 seconds
Optimal ARIMA order: (3, 1, 1)

FOURIER ARIMA MODEL SUMMARY
==================================================
                               SARIMAX Results                                
==============================================================================
Dep. Variable:         Price (Rs./kg)   No. Observations:                  613
Model:                 ARIMA(3, 1, 1)   Log Likelihood               -3946.556
Date:                Thu, 06 Nov 2025   AIC                           7923.112
Time:                        10:39:45   BIC                           7989.363
Sample:                    01-02-2011   HQIC                          7948.880
                         - 09-25-2022                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
cos_1         24.6779     58.974      0.418      0.676     -90.909     140.265
sin_1         30.0690     56.286      0.534      0.593     -80.249     140.387
cos_2        -21.1797     31.257     -0.678      0.498     -82.442      40.083
sin_2         13.0870     38.290      0.342      0.733     -61.960      88.134
cos_3          8.2423     33.333      0.247      0.805     -57.089      73.573
sin_3         21.0912     28.204      0.748      0.455     -34.188      76.370
cos_4        -15.4877     26.412     -0.586      0.558     -67.254      36.279
sin_4        -17.5556     25.418     -0.691      0.490     -67.374      32.263
cos_5         12.3867     30.087      0.412      0.681     -46.583      71.356
sin_5          8.5902     25.788      0.333      0.739     -41.953      59.133
ar.L1          0.5269      0.047     11.213      0.000       0.435       0.619
ar.L2          0.0601      0.041      1.449      0.147      -0.021       0.141
ar.L3         -0.1457      0.038     -3.813      0.000      -0.221      -0.071
ma.L1         -0.7782      0.043    -18.048      0.000      -0.863      -0.694
sigma2      2.336e+04    637.069     36.673      0.000    2.21e+04    2.46e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):             67889.56
Prob(Q):                              0.94   Prob(JB):                         0.00
Heteroskedasticity (H):              17.58   Skew:                             0.74
Prob(H) (two-sided):                  0.00   Kurtosis:                        54.58
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

==================================================
FOURIER SARIMA MODEL
==================================================
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[26] intercept   : AIC=7913.710, Time=8.81 sec
 ARIMA(0,1,0)(0,0,0)[26] intercept   : AIC=7967.513, Time=0.09 sec
 ARIMA(1,1,0)(1,0,0)[26] intercept   : AIC=7944.651, Time=1.74 sec
 ARIMA(0,1,1)(0,0,1)[26] intercept   : AIC=7942.765, Time=1.64 sec
 ARIMA(0,1,0)(0,0,0)[26]             : AIC=7965.525, Time=0.09 sec
 ARIMA(2,1,2)(0,0,1)[26] intercept   : AIC=7911.713, Time=6.45 sec
 ARIMA(2,1,2)(0,0,0)[26] intercept   : AIC=7916.653, Time=0.73 sec
 ARIMA(2,1,2)(0,0,2)[26] intercept   : AIC=7913.709, Time=15.85 sec
 ARIMA(2,1,2)(1,0,0)[26] intercept   : AIC=7911.772, Time=5.94 sec
 ARIMA(2,1,2)(1,0,2)[26] intercept   : AIC=inf, Time=19.04 sec
 ARIMA(1,1,2)(0,0,1)[26] intercept   : AIC=7916.837, Time=4.65 sec
 ARIMA(2,1,1)(0,0,1)[26] intercept   : AIC=7916.837, Time=4.82 sec
 ARIMA(3,1,2)(0,0,1)[26] intercept   : AIC=7908.360, Time=8.56 sec
 ARIMA(3,1,2)(0,0,0)[26] intercept   : AIC=7914.388, Time=1.07 sec
 ARIMA(3,1,2)(1,0,1)[26] intercept   : AIC=7910.416, Time=9.58 sec
 ARIMA(3,1,2)(0,0,2)[26] intercept   : AIC=7910.351, Time=19.03 sec
 ARIMA(3,1,2)(1,0,0)[26] intercept   : AIC=7908.427, Time=7.13 sec
 ARIMA(3,1,2)(1,0,2)[26] intercept   : AIC=7912.248, Time=21.41 sec
 ARIMA(3,1,1)(0,0,1)[26] intercept   : AIC=7907.233, Time=6.17 sec
 ARIMA(3,1,1)(0,0,0)[26] intercept   : AIC=7913.030, Time=0.91 sec
 ARIMA(3,1,1)(1,0,1)[26] intercept   : AIC=7909.227, Time=8.63 sec
 ARIMA(3,1,1)(0,0,2)[26] intercept   : AIC=7909.224, Time=17.02 sec
 ARIMA(3,1,1)(1,0,0)[26] intercept   : AIC=7907.293, Time=7.49 sec
 ARIMA(3,1,1)(1,0,2)[26] intercept   : AIC=inf, Time=21.27 sec
 ARIMA(3,1,0)(0,0,1)[26] intercept   : AIC=7925.684, Time=2.53 sec
 ARIMA(4,1,1)(0,0,1)[26] intercept   : AIC=7908.743, Time=7.75 sec
 ARIMA(2,1,0)(0,0,1)[26] intercept   : AIC=7946.061, Time=1.80 sec
 ARIMA(4,1,0)(0,0,1)[26] intercept   : AIC=7924.287, Time=2.69 sec
 ARIMA(4,1,2)(0,0,1)[26] intercept   : AIC=7908.950, Time=9.15 sec
 ARIMA(3,1,1)(0,0,1)[26]             : AIC=7905.266, Time=2.35 sec
 ARIMA(3,1,1)(0,0,0)[26]             : AIC=7911.076, Time=0.43 sec
 ARIMA(3,1,1)(1,0,1)[26]             : AIC=7907.259, Time=3.59 sec
 ARIMA(3,1,1)(0,0,2)[26]             : AIC=7907.257, Time=6.77 sec
 ARIMA(3,1,1)(1,0,0)[26]             : AIC=7905.329, Time=2.10 sec
 ARIMA(3,1,1)(1,0,2)[26]             : AIC=7909.257, Time=7.21 sec
 ARIMA(2,1,1)(0,0,1)[26]             : AIC=7914.867, Time=2.15 sec
 ARIMA(3,1,0)(0,0,1)[26]             : AIC=7923.701, Time=1.20 sec
 ARIMA(4,1,1)(0,0,1)[26]             : AIC=7906.776, Time=2.97 sec
 ARIMA(3,1,2)(0,0,1)[26]             : AIC=7906.392, Time=2.55 sec
 ARIMA(2,1,0)(0,0,1)[26]             : AIC=7944.073, Time=0.92 sec
 ARIMA(2,1,2)(0,0,1)[26]             : AIC=7909.742, Time=2.39 sec
 ARIMA(4,1,0)(0,0,1)[26]             : AIC=7922.306, Time=1.48 sec
 ARIMA(4,1,2)(0,0,1)[26]             : AIC=7906.848, Time=4.45 sec

Best model:  ARIMA(3,1,1)(0,0,1)[26]          
Total fit time: 262.643 seconds
Optimal SARIMA order: (3, 1, 1)
Optimal Seasonal order: (0, 0, 1, 26)

FOURIER SARIMA MODEL SUMMARY
==================================================
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                     Price (Rs./kg)   No. Observations:                  613
Model:             SARIMAX(3, 1, 1)x(0, 0, 1, 26)   Log Likelihood               -3769.721
Date:                            Thu, 06 Nov 2025   AIC                           7571.443
Time:                                    10:44:19   BIC                           7641.361
Sample:                                01-02-2011   HQIC                          7598.693
                                     - 09-25-2022                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
cos_1         24.4663     68.118      0.359      0.719    -109.042     157.975
sin_1         30.0717     63.225      0.476      0.634     -93.847     153.990
cos_2        -21.2353     28.584     -0.743      0.458     -77.258      34.787
sin_2         13.0552     36.760      0.355      0.722     -58.994      85.104
cos_3          8.0218     39.527      0.203      0.839     -69.451      85.494
sin_3         21.1340     33.143      0.638      0.524     -43.826      86.094
cos_4        -15.7023     26.415     -0.594      0.552     -67.475      36.071
sin_4        -17.9938     28.764     -0.626      0.532     -74.371      38.384
cos_5         12.5697     35.232      0.357      0.721     -56.483      81.623
sin_5          8.5754     31.656      0.271      0.786     -53.470      70.621
ar.L1          0.5375      0.047     11.446      0.000       0.445       0.630
ar.L2          0.0585      0.042      1.385      0.166      -0.024       0.141
ar.L3         -0.1566      0.040     -3.947      0.000      -0.234      -0.079
ma.L1         -0.7799      0.042    -18.553      0.000      -0.862      -0.697
ma.S.L26      -0.1166      0.025     -4.586      0.000      -0.166      -0.067
sigma2      2.361e+04    745.500     31.664      0.000    2.21e+04    2.51e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):             55910.34
Prob(Q):                              0.93   Prob(JB):                         0.00
Heteroskedasticity (H):              29.99   Skew:                             1.18
Prob(H) (two-sided):                  0.00   Kurtosis:                        50.88
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

FOURIER ARIMA EVALUATION METRICS
==================================================
MSE: 648480.1852
RMSE: 805.2827
MAE: 681.7101
MAPE: 37.97%
R²: -2.2624
Directional Accuracy: 23.15%

FOURIER SARIMA EVALUATION METRICS
==================================================
MSE: 650370.4008
RMSE: 806.4555
MAE: 682.7201
MAPE: 38.03%
R²: -2.2719
Directional Accuracy: 24.07%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
==================================================
FUTURE FORECAST (NEXT 12 WEEKS)
==================================================

Fourier ARIMA Future Forecast:
2024-11-03: 900.68
2024-11-10: 891.36
2024-11-17: 887.67
2024-11-24: 891.69
2024-12-01: 900.62
2024-12-08: 912.35
2024-12-15: 924.36
2024-12-22: 935.11
2024-12-29: 943.95
2025-01-05: 951.27
2025-01-12: 958.08
2025-01-19: 965.35

Fourier SARIMA Future Forecast:
2024-11-03: 902.34
2024-11-10: 889.00
2024-11-17: 884.43
2024-11-24: 895.70
2024-12-01: 906.19
2024-12-08: 916.03
2024-12-15: 936.92
2024-12-22: 946.38
2024-12-29: 947.46
2025-01-05: 957.19
2025-01-12: 969.24
2025-01-19: 966.35

==================================================
MODEL COMPARISON SUMMARY
==================================================
Metric               Fourier ARIMA   Fourier SARIMA 
--------------------------------------------------
MSE                  648480.1852     650370.4008    
RMSE                 805.2827        806.4555       
MAE                  681.7101        682.7201       
MAPE                 37.97%          38.03%         
R²                   -2.2624         -2.2719        
Directional Accuracy 23.15%          24.07%         

Best Model: Fourier ARIMA (Lower RMSE: 805.2827 vs 806.4555)

Selected Fourier ARIMA as the best performing model
In [19]:
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)

plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA', color='orange', linestyle='--', linewidth=2)
plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result15.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [20]:
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA', color='red', linestyle='--', linewidth=2)

plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result51.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

--- Fourier + LSTM ---¶

--- Import libraries for Fourier +LSTM ----¶

Final model training on full training data (70%) and validation (15%) split¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')

# TensorFlow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()

print(f"Using original data length: {len(prices)}")

# --- Step 1: Fourier Feature Engineering ---
def add_fourier_features(prices_series, n_harmonics=5):
    """Add Fourier features based on dominant frequencies"""
    # Convert to numpy array for FFT
    prices_values = prices_series.values
    n = len(prices_values)
    t = np.arange(n)
    
    # Perform FFT to find dominant frequencies
    fft_vals = fft(prices_values)
    fft_freqs = fftfreq(n)
    
    # Get dominant frequencies (excluding DC component and negative frequencies)
    positive_freq_mask = fft_freqs > 0
    positive_freqs = fft_freqs[positive_freq_mask]
    positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
    
    # Get indices of dominant frequencies
    dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
    dom_freqs = positive_freqs[dominant_indices]
    
    fourier_df = pd.DataFrame(index=prices_series.index)
    
    for i, freq in enumerate(dom_freqs):
        if abs(freq) < 1e-10:  # Skip near-zero frequencies
            continue
        fourier_df[f'sin_{i}'] = np.sin(2 * np.pi * freq * t)
        fourier_df[f'cos_{i}'] = np.cos(2 * np.pi * freq * t)
    
    return fourier_df

# Generate Fourier features
fourier_features = add_fourier_features(prices, n_harmonics=5)  # Changed from prices_clean to prices

# Combine original prices with Fourier features
data_with_fourier = pd.concat([prices, fourier_features], axis=1)  # Changed from prices_clean to prices

# --- Step 2: Data Scaling ---
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_with_fourier)

# Separate the scaled data back into features and target
scaled_prices = scaled_data[:, 0]  # First column is the target
scaled_fourier = scaled_data[:, 1:]  # Remaining columns are Fourier features

# --- Step 3: Create Sequences for LSTM ---
def create_sequences(data, target, lookback=52):
    """Create sequences for LSTM training"""
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])  # Lookback window
        y.append(target[i])  # Target value
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks (1 year) lookback period

X, y = create_sequences(scaled_data, scaled_prices, lookback)

# --- Step 4: Train/Test Split ---
split1 = int(0.7 * len(X))
split2 = int(0.85 * len(X))

X_train, X_val, X_test = X[:split1], X[split1:split2], X[split2:]
y_train, y_val, y_test = y[:split1], y[split1:split2], y[split2:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 5: Hyperparameter Tuning ---
def build_model(hp):
    model = Sequential()
    
    # Number of LSTM layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Hyperparameter tuning
print("\n" + "="*50)
print("HYPERPARAMETER TUNING")
print("="*50)

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=2,
    directory='lstm_tuning',
    project_name='cardamom_forecast'
)

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
    print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")

# --- Step 6: Build and Train Final Model ---
print("\n" + "="*50)
print("TRAINING FINAL MODEL")
print("="*50)

# Build final model with best hyperparameters
final_model = tuner.hypermodel.build(best_hp)

# Train the model
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 7: Forecasting ---
# Predict on test set
y_pred = final_model.predict(X_test).flatten()

# Inverse transform predictions
# Create dummy array for inverse transformation
dummy_array = np.zeros((len(y_pred), scaled_data.shape[1]))
dummy_array[:, 0] = y_pred
dummy_array[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_pred)]  # Corresponding Fourier features

# Inverse transform
inverse_transformed = scaler.inverse_transform(dummy_array)
forecast_mean = inverse_transformed[:, 0]

# Get actual values (inverse transformed)
actual_dummy = np.zeros((len(y_test), scaled_data.shape[1]))
actual_dummy[:, 0] = y_test
actual_dummy[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_test)]
actual_inverse = scaler.inverse_transform(actual_dummy)
actual_values = actual_inverse[:, 0]

# --- Step 8: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    actual = np.asarray(actual)
    forecast = np.asarray(forecast)
    
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    
    with np.errstate(divide='ignore', invalid='ignore'):
        ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
        mape = np.nanmean(ape) * 100
    
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = np.mean(actual_diff == forecast_diff) * 100
    
    return {
        'MSE': f"{mse:.4f}",
        'RMSE': f"{rmse:.4f}",
        'MAE': f"{mae:.4f}",
        'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
        'R²': f"{r2:.4f}",
        'Directional Accuracy': f"{da:.2f}%"
    }

metrics = evaluate_forecast(actual_values, forecast_mean)

print("\n" + "="*50)
print("MODEL SUMMARY")
print("="*50)
print(f"Lookback period: {lookback} weeks")
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print("\nArchitecture:")
final_model.summary()

print("\n" + "="*50)
print("FORECAST EVALUATION METRICS")
print("="*50)
for metric, value in metrics.items():
    print(f"{metric}: {value}")

plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]  # Changed from prices_clean to prices
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:lookback+split2], prices[:lookback+split2], label='Train+Val', color='blue', alpha=0.7)  # Changed from prices_clean to prices
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier LSTM')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, fourier_features, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        # Predict next value
        prediction = model.predict(current_sequence.reshape(1, lookback, -1), verbose=0)[0, 0]
        
        # Update sequence
        new_row = np.zeros_like(current_sequence[0])
        new_row[0] = prediction  # Price prediction
        new_row[1:] = current_sequence[-1, 1:]  # Keep Fourier features same as last observation
        
        current_sequence = np.vstack([current_sequence[1:], new_row])
        forecasts.append(prediction)
    
    # Inverse transform
    dummy_array = np.zeros((len(forecasts), scaled_data.shape[1]))
    dummy_array[:, 0] = forecasts
    dummy_array[:, 1:] = fourier_features[-len(forecasts):] if len(fourier_features) >= len(forecasts) else fourier_features[-1]
    
    inverse_forecasts = scaler.inverse_transform(dummy_array)[:, 0]
    
    # Create future dates
    last_date = prices.index[-1]  # Changed from prices_clean to prices
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, inverse_forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]  # Last lookback window
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaled_fourier, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST (NEXT 12 WEEKS)")
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- Additional: Residual Analysis ---
print("\n" + "="*50)
print("RESIDUAL ANALYSIS")
print("="*50)

residuals = actual_values - forecast_mean
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")

# Plot residuals
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Model Residuals')
plt.grid(True)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result4.png", dpi=300, bbox_inches='tight')
plt.show()
Using original data length: 722
Training sequences: (468, 52, 11)
Validation sequences: (101, 52, 11)
Test sequences: (101, 52, 11)

==================================================
HYPERPARAMETER TUNING
==================================================
Reloading Tuner from lstm_tuning\cardamom_forecast\tuner0.json

Best Hyperparameters:
Number of LSTM layers: 3
Learning rate: 0.0007999550401225116
LSTM layer 1 units: 192
LSTM layer 1 dropout: 0.1
LSTM layer 2 units: 64
LSTM layer 2 dropout: 0.4
LSTM layer 3 units: 32
LSTM layer 3 dropout: 0.1

==================================================
TRAINING FINAL MODEL
==================================================
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 18s 241ms/step - loss: 0.1027 - mae: 0.2346 - val_loss: 0.0042 - val_mae: 0.0481
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 119ms/step - loss: 0.0175 - mae: 0.0947 - val_loss: 0.0106 - val_mae: 0.0900
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 118ms/step - loss: 0.0124 - mae: 0.0746 - val_loss: 0.0148 - val_mae: 0.1152
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 117ms/step - loss: 0.0132 - mae: 0.0734 - val_loss: 0.0150 - val_mae: 0.1172
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 115ms/step - loss: 0.0156 - mae: 0.0762 - val_loss: 0.0218 - val_mae: 0.1397
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 117ms/step - loss: 0.0102 - mae: 0.0652 - val_loss: 0.0175 - val_mae: 0.1270
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0098 - mae: 0.0616 - val_loss: 0.0206 - val_mae: 0.1338
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 115ms/step - loss: 0.0100 - mae: 0.0629 - val_loss: 0.0167 - val_mae: 0.1201
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 118ms/step - loss: 0.0092 - mae: 0.0611 - val_loss: 0.0247 - val_mae: 0.1473
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 119ms/step - loss: 0.0113 - mae: 0.0640 - val_loss: 0.0145 - val_mae: 0.1136
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 114ms/step - loss: 0.0097 - mae: 0.0628 - val_loss: 0.0124 - val_mae: 0.1038
4/4 ━━━━━━━━━━━━━━━━━━━━ 2s 349ms/step

==================================================
MODEL SUMMARY
==================================================
Lookback period: 52 weeks
Final epochs trained: 11
Best validation loss: 0.0042
Best validation MAE: 0.0481

Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                          │ (None, 52, 192)             │         156,672 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout)                    │ (None, 52, 192)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ lstm_1 (LSTM)                        │ (None, 52, 64)              │          65,792 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_1 (Dropout)                  │ (None, 52, 64)              │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ lstm_2 (LSTM)                        │ (None, 32)                  │          12,416 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_2 (Dropout)                  │ (None, 32)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 112)                 │           3,696 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_3 (Dropout)                  │ (None, 112)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 32)                  │           3,616 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_4 (Dropout)                  │ (None, 32)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_2 (Dense)                      │ (None, 1)                   │              33 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 726,677 (2.77 MB)
 Trainable params: 242,225 (946.19 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 484,452 (1.85 MB)
==================================================
FORECAST EVALUATION METRICS
==================================================
MSE: 669119.3890
RMSE: 817.9972
MAE: 753.7879
MAPE: 45.13%
R²: -2.8036
Directional Accuracy: 21.00%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
==================================================
FUTURE FORECAST (NEXT 12 WEEKS)
==================================================
2024-11-03: 1408.34
2024-11-10: 1409.47
2024-11-17: 1409.35
2024-11-24: 1407.97
2024-12-01: 1405.36
2024-12-08: 1401.72
2024-12-15: 1397.19
2024-12-22: 1391.41
2024-12-29: 1385.15
2025-01-05: 1378.57
2025-01-12: 1371.84
2025-01-19: 1365.09

==================================================
RESIDUAL ANALYSIS
==================================================
Residual mean: 753.7879
Residual std: 317.6841
No description has been provided for this image
In [2]:
import matplotlib.pyplot as plt
import numpy as np

# Reconstruct denoised signal from first n_harmonics frequencies
# (This assumes you're using the same FFT approach as earlier)
def denoise_signal_fft(prices, n_harmonics=5):
    n = len(prices)
    fft_vals = fft(prices)
    fft_freqs = fftfreq(n)
    # Zero-out all but the largest frequency components
    denoised_fft = np.zeros_like(fft_vals)
    positive_freq_mask = fft_freqs > 0
    positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
    dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
    # Add back DC component
    denoised_fft[0] = fft_vals[0]
    # Add back n_harmonics dominant frequencies
    denoised_indices = np.where(positive_freq_mask)[0][dominant_indices]
    for idx in denoised_indices:
        denoised_fft[idx] = fft_vals[idx]
        denoised_fft[-idx] = fft_vals[-idx]  # Mirror negative frequencies
    # Inverse FFT to get denoised signal
    denoised_signal = np.real(np.fft.ifft(denoised_fft))
    return denoised_signal

denoised_prices = denoise_signal_fft(prices.values, n_harmonics=5)

# Plot original and denoised series
plt.figure(figsize=(14, 6))
plt.plot(prices.index, prices, label='Original Time Series', color='blue', linewidth=2)
plt.plot(prices.index, denoised_prices, label='Denoised (Fourier, Top 5 Harmonics)', color='red', linewidth=2)
plt.title('Original vs Denoised (Trend) Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fourier_denoised_timeseries3.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [19]:
# --- Step 11: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("LSTM MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)

# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")

# Model architecture details
for i, layer in enumerate(final_model.layers):
    print(f"\nLayer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, "units"):
        print(f"  Units: {layer.units}")
    if hasattr(layer, "activation"):
        print(f"  Activation: {layer.activation.__name__}")
    if hasattr(layer, "rate"):
        print(f"  Dropout Rate: {layer.rate}")
    if hasattr(layer, "return_sequences"):
        print(f"  Return Sequences: {layer.return_sequences}")

# Training summary
print("\nTraining Summary:")
print(f"Total Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")

print("\n" + "="*60)
print("NOTE: This report captures optimizer, learning rate, "
      "activation functions, and all layer details for reproducibility.")
print("="*60)
============================================================
LSTM MODEL CONFIGURATION & TRAINING DETAILS
============================================================
Optimizer: Adam
Learning Rate: 0.0007999550434760749

Layer 1: LSTM
  Units: 192
  Activation: tanh
  Return Sequences: True

Layer 2: Dropout
  Dropout Rate: 0.1

Layer 3: LSTM
  Units: 64
  Activation: tanh
  Return Sequences: True

Layer 4: Dropout
  Dropout Rate: 0.4

Layer 5: LSTM
  Units: 32
  Activation: tanh
  Return Sequences: False

Layer 6: Dropout
  Dropout Rate: 0.1

Layer 7: Dense
  Units: 112
  Activation: relu

Layer 8: Dropout
  Dropout Rate: 0.1

Layer 9: Dense
  Units: 32
  Activation: relu

Layer 10: Dropout
  Dropout Rate: 0.2

Layer 11: Dense
  Units: 1
  Activation: linear

Training Summary:
Total Epochs Trained: 11
Final Training Loss: 0.0087
Final Validation Loss: 0.0183
Final Training MAE: 0.0536
Final Validation MAE: 0.1286

============================================================
NOTE: This report captures optimizer, learning rate, activation functions, and all layer details for reproducibility.
============================================================
In [27]:
# --- Step 9: Visualization ---
plt.figure(figsize=(12, 6))



# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]  # Changed from prices_clean to prices


plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier LSTM')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result66.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

Fourier + GRU¶

Import Libraries for Fourier + GRU¶

In [70]:
 
In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')

# TensorFlow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout  # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch

# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()

print(f"Using original data length: {len(prices)}")

# --- Step 1: Fourier Feature Engineering ---
def add_fourier_features(prices_series, n_harmonics=5):
    """Add Fourier features based on dominant frequencies"""
    # Convert to numpy array for FFT
    prices_values = prices_series.values
    n = len(prices_values)
    t = np.arange(n)
    
    # Perform FFT to find dominant frequencies
    fft_vals = fft(prices_values)
    fft_freqs = fftfreq(n)
    
    # Get dominant frequencies (excluding DC component and negative frequencies)
    positive_freq_mask = fft_freqs > 0
    positive_freqs = fft_freqs[positive_freq_mask]
    positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
    
    # Get indices of dominant frequencies
    dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
    dom_freqs = positive_freqs[dominant_indices]
    
    fourier_df = pd.DataFrame(index=prices_series.index)
    
    for i, freq in enumerate(dom_freqs):
        if abs(freq) < 1e-10:  # Skip near-zero frequencies
            continue
        fourier_df[f'sin_{i}'] = np.sin(2 * np.pi * freq * t)
        fourier_df[f'cos_{i}'] = np.cos(2 * np.pi * freq * t)
    
    return fourier_df

# Generate Fourier features
fourier_features = add_fourier_features(prices, n_harmonics=5)

# Combine original prices with Fourier features
data_with_fourier = pd.concat([prices, fourier_features], axis=1)

# --- Step 2: Data Scaling ---
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_with_fourier)

# Separate the scaled data back into features and target
scaled_prices = scaled_data[:, 0]  # First column is the target
scaled_fourier = scaled_data[:, 1:]  # Remaining columns are Fourier features

# --- Step 3: Create Sequences for GRU ---
def create_sequences(data, target, lookback=52):
    """Create sequences for GRU training"""
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])  # Lookback window
        y.append(target[i])  # Target value
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks (1 year) lookback period

X, y = create_sequences(scaled_data, scaled_prices, lookback)

# --- Step 4: Train/Test Split ---
split1 = int(0.7 * len(X))
split2 = int(0.85 * len(X))

X_train, X_val, X_test = X[:split1], X[split1:split2], X[split2:]
y_train, y_val, y_test = y[:split1], y[split1:split2], y[split2:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 5: Hyperparameter Tuning for GRU ---
def build_model(hp):
    model = Sequential()
    
    # Number of GRU layers (changed from LSTM to GRU)
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(GRU(  # Changed LSTM to GRU
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Hyperparameter tuning
print("\n" + "="*50)
print("HYPERPARAMETER TUNING FOR GRU")
print("="*50)

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=2,
    directory='gru_tuning',  # Changed from lstm_tuning to gru_tuning
    project_name='cardamom_forecast_gru'  # Added _gru suffix
)

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}")  # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}")  # Changed from LSTM to GRU
    print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")  # Changed from LSTM to GRU

# --- Step 6: Build and Train Final GRU Model ---
print("\n" + "="*50)
print("TRAINING FINAL GRU MODEL")  # Changed from LSTM to GRU
print("="*50)

# Build final model with best hyperparameters
final_model = tuner.hypermodel.build(best_hp)

# Train the model
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 7: Forecasting ---
# Predict on test set
y_pred = final_model.predict(X_test).flatten()

# Inverse transform predictions
# Create dummy array for inverse transformation
dummy_array = np.zeros((len(y_pred), scaled_data.shape[1]))
dummy_array[:, 0] = y_pred
dummy_array[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_pred)]  # Corresponding Fourier features

# Inverse transform
inverse_transformed = scaler.inverse_transform(dummy_array)
forecast_mean = inverse_transformed[:, 0]

# Get actual values (inverse transformed)
actual_dummy = np.zeros((len(y_test), scaled_data.shape[1]))
actual_dummy[:, 0] = y_test
actual_dummy[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_test)]
actual_inverse = scaler.inverse_transform(actual_dummy)
actual_values = actual_inverse[:, 0]

# --- Step 8: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    actual = np.asarray(actual)
    forecast = np.asarray(forecast)
    
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    
    with np.errstate(divide='ignore', invalid='ignore'):
        ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
        mape = np.nanmean(ape) * 100
    
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = np.mean(actual_diff == forecast_diff) * 100
    
    return {
        'MSE': f"{mse:.4f}",
        'RMSE': f"{rmse:.4f}",
        'MAE': f"{mae:.4f}",
        'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
        'R²': f"{r2:.4f}",
        'Directional Accuracy': f"{da:.2f}%"
    }

metrics = evaluate_forecast(actual_values, forecast_mean)

print("\n" + "="*50)
print("MODEL SUMMARY - FOURIER GRU")  # Changed to GRU
print("="*50)
print(f"Lookback period: {lookback} weeks")
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print("\nArchitecture:")
final_model.summary()

print("\n" + "="*50)
print("FORECAST EVALUATION METRICS")
print("="*50)
for metric, value in metrics.items():
    print(f"{metric}: {value}")

# --- Step 9: Visualization ---
plt.figure(figsize=(16, 12))

# Plot 1: Training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('GRU Model Training History')  # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:lookback+split2], prices[:lookback+split2], label='Train+Val', color='blue', alpha=0.7)
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier GRU')  # Changed to GRU
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result2.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, fourier_features, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        # Predict next value
        prediction = model.predict(current_sequence.reshape(1, lookback, -1), verbose=0)[0, 0]
        
        # Update sequence
        new_row = np.zeros_like(current_sequence[0])
        new_row[0] = prediction  # Price prediction
        new_row[1:] = current_sequence[-1, 1:]  # Keep Fourier features same as last observation
        
        current_sequence = np.vstack([current_sequence[1:], new_row])
        forecasts.append(prediction)
    
    # Inverse transform
    dummy_array = np.zeros((len(forecasts), scaled_data.shape[1]))
    dummy_array[:, 0] = forecasts
    dummy_array[:, 1:] = fourier_features[-len(forecasts):] if len(fourier_features) >= len(forecasts) else fourier_features[-1]
    
    inverse_forecasts = scaler.inverse_transform(dummy_array)[:, 0]
    
    # Create future dates
    last_date = prices.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, inverse_forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]  # Last lookback window
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaled_fourier, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST (NEXT 12 WEEKS)")
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- Additional: Residual Analysis ---
print("\n" + "="*50)
print("RESIDUAL ANALYSIS")
print("="*50)

residuals = actual_values - forecast_mean
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")

# Plot residuals
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Model Residuals')  # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Additional: Compare GRU performance benefits ---
print("\n" + "="*50)
print("GRU vs LSTM COMPARISON")
print("="*50)
print("GRU Advantages:")
print("- Fewer parameters (faster training)")
print("- Simpler architecture (2 gates vs LSTM's 3 gates)")
print("- Better performance on smaller datasets")
print("- Less prone to overfitting")
print("- More efficient memory usage")
Using original data length: 722
Training sequences: (468, 52, 11)
Validation sequences: (101, 52, 11)
Test sequences: (101, 52, 11)

==================================================
HYPERPARAMETER TUNING FOR GRU
==================================================
Reloading Tuner from gru_tuning\cardamom_forecast_gru\tuner0.json

Best Hyperparameters:
Number of GRU layers: 1
Learning rate: 0.00349041012762722
GRU layer 1 units: 160
GRU layer 1 dropout: 0.5

==================================================
TRAINING FINAL GRU MODEL
==================================================
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 6s 113ms/step - loss: 0.1296 - mae: 0.2756 - val_loss: 0.0297 - val_mae: 0.1638
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0246 - mae: 0.1162 - val_loss: 0.0252 - val_mae: 0.1507
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0167 - mae: 0.0902 - val_loss: 0.0327 - val_mae: 0.1769
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0161 - mae: 0.0875 - val_loss: 0.0232 - val_mae: 0.1484
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0131 - mae: 0.0767 - val_loss: 0.0224 - val_mae: 0.1445
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0127 - mae: 0.0795 - val_loss: 0.0251 - val_mae: 0.1528
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0108 - mae: 0.0709 - val_loss: 0.0256 - val_mae: 0.1564
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0109 - mae: 0.0670 - val_loss: 0.0134 - val_mae: 0.1117
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0149 - mae: 0.0730 - val_loss: 0.0131 - val_mae: 0.1102
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0143 - mae: 0.0775 - val_loss: 0.0148 - val_mae: 0.1156
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0101 - mae: 0.0624 - val_loss: 0.0132 - val_mae: 0.1097
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0083 - mae: 0.0575 - val_loss: 0.0142 - val_mae: 0.1151
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0125 - mae: 0.0630 - val_loss: 0.0129 - val_mae: 0.1085
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0075 - mae: 0.0547 - val_loss: 0.0202 - val_mae: 0.1372
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0091 - mae: 0.0584 - val_loss: 0.0121 - val_mae: 0.1057
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0087 - mae: 0.0556 - val_loss: 0.0202 - val_mae: 0.1365
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0101 - mae: 0.0536 - val_loss: 0.0154 - val_mae: 0.1197
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 0.0076 - mae: 0.0503 - val_loss: 0.0162 - val_mae: 0.1222
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0075 - mae: 0.0524 - val_loss: 0.0125 - val_mae: 0.1080
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0081 - mae: 0.0529 - val_loss: 0.0101 - val_mae: 0.0962
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0105 - mae: 0.0601 - val_loss: 0.0128 - val_mae: 0.1082
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0069 - mae: 0.0503 - val_loss: 0.0129 - val_mae: 0.1073
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0059 - mae: 0.0479 - val_loss: 0.0068 - val_mae: 0.0786
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0085 - mae: 0.0555 - val_loss: 0.0090 - val_mae: 0.0903
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0070 - mae: 0.0502 - val_loss: 0.0094 - val_mae: 0.0926
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0071 - mae: 0.0522 - val_loss: 0.0183 - val_mae: 0.1245
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0072 - mae: 0.0509 - val_loss: 0.0148 - val_mae: 0.1150
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0063 - mae: 0.0491 - val_loss: 0.0099 - val_mae: 0.0953
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0056 - mae: 0.0472 - val_loss: 0.0135 - val_mae: 0.1104
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0078 - mae: 0.0528 - val_loss: 0.0173 - val_mae: 0.1262
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0069 - mae: 0.0513 - val_loss: 0.0175 - val_mae: 0.1276
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0077 - mae: 0.0532 - val_loss: 0.0090 - val_mae: 0.0906
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0069 - mae: 0.0512 - val_loss: 0.0094 - val_mae: 0.0926
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step

==================================================
MODEL SUMMARY - FOURIER GRU
==================================================
Lookback period: 52 weeks
Final epochs trained: 33
Best validation loss: 0.0068
Best validation MAE: 0.0786

Architecture:
Model: "sequential_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ gru_2 (GRU)                          │ (None, 160)                 │          83,040 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_10 (Dropout)                 │ (None, 160)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_6 (Dense)                      │ (None, 112)                 │          18,032 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_11 (Dropout)                 │ (None, 112)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_7 (Dense)                      │ (None, 1)                   │             113 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 303,557 (1.16 MB)
 Trainable params: 101,185 (395.25 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 202,372 (790.52 KB)
==================================================
FORECAST EVALUATION METRICS
==================================================
MSE: 520635.0803
RMSE: 721.5505
MAE: 661.0315
MAPE: 39.48%
R²: -1.9596
Directional Accuracy: 20.00%
<Figure size 1600x1200 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
==================================================
FUTURE FORECAST (NEXT 12 WEEKS)
==================================================
2024-11-03: 1181.33
2024-11-10: 917.99
2024-11-17: 827.83
2024-11-24: 800.37
2024-12-01: 791.48
2024-12-08: 789.89
2024-12-15: 790.28
2024-12-22: 790.75
2024-12-29: 790.94
2025-01-05: 790.95
2025-01-12: 790.90
2025-01-19: 790.86

==================================================
RESIDUAL ANALYSIS
==================================================
Residual mean: 661.0315
Residual std: 289.2620
No description has been provided for this image
==================================================
GRU vs LSTM COMPARISON
==================================================
GRU Advantages:
- Fewer parameters (faster training)
- Simpler architecture (2 gates vs LSTM's 3 gates)
- Better performance on smaller datasets
- Less prone to overfitting
- More efficient memory usage
In [23]:
# --- Step 11: Detailed Model Configuration Report & JSON Export ---
import json
import inspect
from tensorflow.keras import backend as K

print("\n" + "="*60)
print("FOURIER + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)

report = {}

# Optimizer details (robust handling for TensorFlow objects)
try:
    opt = final_model.optimizer
    opt_name = opt.__class__.__name__
    # learning_rate may be a schedule/variable; get numeric if possible
    try:
        lr_val = float(K.get_value(opt.learning_rate))
    except Exception:
        # fallback to config if present
        opt_cfg = opt.get_config()
        lr_val = opt_cfg.get('learning_rate', None)
        try:
            lr_val = float(lr_val)
        except Exception:
            pass
    report['optimizer'] = opt_name
    report['learning_rate'] = lr_val
    print(f"Optimizer: {opt_name}")
    print(f"Learning Rate: {lr_val}")
except Exception as e:
    report['optimizer'] = str(e)
    print(f"Could not fetch optimizer details: {e}")

# Hyperparameter info (if tuner/best_hp available)
try:
    report['best_hyperparameters'] = best_hp.values
    print("\nBest Hyperparameters (from tuner):")
    for k, v in best_hp.values.items():
        print(f"  {k}: {v}")
except Exception:
    # best_hp may not be available in some contexts
    try:
        # If tuner exists and has get_best_hyperparameters
        best = tuner.get_best_hyperparameters(num_trials=1)[0]
        report['best_hyperparameters'] = best.values
        print("\nBest Hyperparameters (from tuner):")
        for k, v in best.values.items():
            print(f"  {k}: {v}")
    except Exception:
        report['best_hyperparameters'] = None

# Model architecture details
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
    layer_info = {}
    layer_info['index'] = i + 1
    layer_info['class_name'] = layer.__class__.__name__
    layer_info['name'] = layer.name
    # Units (for recurrent / dense)
    if hasattr(layer, 'units'):
        try:
            layer_info['units'] = int(layer.units)
            print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer.units}")
        except Exception:
            layer_info['units'] = str(getattr(layer, 'units', None))
            print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
    else:
        print(f" Layer {i+1}: {layer.__class__.__name__}")
    # Activation
    if hasattr(layer, 'activation'):
        try:
            layer_info['activation'] = layer.activation.__name__
            print(f"    activation: {layer_info['activation']}")
        except Exception:
            layer_info['activation'] = str(getattr(layer, 'activation', None))
            print(f"    activation: {layer_info['activation']}")
    # Dropout rate
    if hasattr(layer, 'rate'):
        try:
            layer_info['dropout_rate'] = float(layer.rate)
            print(f"    dropout_rate: {layer_info['dropout_rate']}")
        except Exception:
            layer_info['dropout_rate'] = str(getattr(layer, 'rate', None))
            print(f"    dropout_rate: {layer_info['dropout_rate']}")
    # return_sequences
    if hasattr(layer, 'return_sequences'):
        try:
            layer_info['return_sequences'] = bool(layer.return_sequences)
            print(f"    return_sequences: {layer_info['return_sequences']}")
        except Exception:
            layer_info['return_sequences'] = str(getattr(layer, 'return_sequences', None))
            print(f"    return_sequences: {layer_info['return_sequences']}")
    # input/output shapes (if available)
    try:
        layer_info['input_shape'] = layer.input_shape
        layer_info['output_shape'] = layer.output_shape
    except Exception:
        layer_info['input_shape'] = None
        layer_info['output_shape'] = None

    layers_report.append(layer_info)

report['layers'] = layers_report

# Training summary
training_summary = {}
training_summary['lookback'] = int(lookback) if 'lookback' in globals() else None
training_summary['epochs_trained'] = len(history.history['loss'])
training_summary['final_training_loss'] = float(history.history['loss'][-1])
training_summary['final_validation_loss'] = float(history.history['val_loss'][-1]) if history.history.get('val_loss') else None
# try to capture MAE if present
training_summary['final_training_mae'] = float(history.history.get('mae')[-1]) if history.history.get('mae') else None
training_summary['final_validation_mae'] = float(history.history.get('val_mae')[-1]) if history.history.get('val_mae') else None

print("\nTraining Summary:")
print(f" Lookback period: {training_summary['lookback']} weeks")
print(f" Epochs Trained: {training_summary['epochs_trained']}")
print(f" Final Training Loss: {training_summary['final_training_loss']:.6f}")
if training_summary['final_validation_loss'] is not None:
    print(f" Final Validation Loss: {training_summary['final_validation_loss']:.6f}")
if training_summary['final_training_mae'] is not None:
    print(f" Final Training MAE: {training_summary['final_training_mae']:.6f}")
if training_summary['final_validation_mae'] is not None:
    print(f" Final Validation MAE: {training_summary['final_validation_mae']:.6f}")

report['training_summary'] = training_summary

# Evaluation metrics (from earlier)
try:
    report['evaluation_metrics'] = metrics
    print("\nEvaluation Metrics:")
    for k, v in metrics.items():
        print(f" {k}: {v}")
except Exception as e:
    report['evaluation_metrics'] = None
    print(f"Could not attach evaluation metrics: {e}")

# Residual stats
try:
    residuals_stats = {
        'residual_mean': float(np.mean(residuals)),
        'residual_std': float(np.std(residuals)),
        'residual_min': float(np.min(residuals)),
        'residual_max': float(np.max(residuals))
    }
    report['residuals'] = residuals_stats
    print("\nResiduals:")
    print(f" Mean: {residuals_stats['residual_mean']:.6f}")
    print(f" Std : {residuals_stats['residual_std']:.6f}")
    print(f" Min : {residuals_stats['residual_min']:.6f}")
    print(f" Max : {residuals_stats['residual_max']:.6f}")
except Exception as e:
    print(f"Could not compute residual stats: {e}")

# Timestamp & environment info
import datetime, platform
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__

# Save JSON report
report_filename = "fourier_gru_report.json"
try:
    with open(report_filename, 'w', encoding='utf-8') as f:
        json.dump(report, f, indent=2, ensure_ascii=False)
    print(f"\nSaved detailed report to: {report_filename}")
except Exception as e:
    print(f"Failed to save JSON report: {e}")

print("\n" + "="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
FOURIER + GRU MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0034904100466519594

Best Hyperparameters (from tuner):
  num_layers: 1
  units_0: 160
  dropout_0: 0.5
  dense_layers: 1
  learning_rate: 0.00349041012762722
  units_1: 192
  dropout_1: 0.4
  units_2: 32
  dropout_2: 0.4
  dense_units_0: 112
  dense_dropout_0: 0.5
  dense_units_1: 16
  dense_dropout_1: 0.4

Model Layers:
 Layer 1: GRU - units: 160
    activation: tanh
    return_sequences: False
 Layer 2: Dropout
    dropout_rate: 0.5
 Layer 3: Dense - units: 112
    activation: relu
 Layer 4: Dropout
    dropout_rate: 0.5
 Layer 5: Dense - units: 1
    activation: linear

Training Summary:
 Lookback period: 52 weeks
 Epochs Trained: 26
 Final Training Loss: 0.007103
 Final Validation Loss: 0.015748
 Final Training MAE: 0.049101
 Final Validation MAE: 0.118491

Evaluation Metrics:
 MSE: 475207.6184
 RMSE: 689.3530
 MAE: 628.8056
 MAPE: 37.27%
 R²: -1.7013
 Directional Accuracy: 19.00%

Residuals:
 Mean: 628.805635
 Std : 282.508569
 Min : 59.539413
 Max : 1762.257271

Saved detailed report to: fourier_gru_report.json

============================================================
REPORT COMPLETE
============================================================
In [29]:
# Plot 3: Forecast Results
# --- Step 9: Visualization ---
plt.figure(figsize=(12, 6))

test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]


plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier GRU')  # Changed to GRU
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flgr_result4.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

Wavelet + ARIMA¶

Import Libraries for Wavelet + ARIMA¶

In [33]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA   # 👈 ADD THIS
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from itertools import product
from sklearn.preprocessing import MinMaxScaler

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

# Perform DWT decomposition with db4 at level 3
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)

# Denoise by keeping only the approximation coefficients (remove high-frequency details)
denoised_coeffs = [coeffs[0]] + [np.zeros_like(c) for c in coeffs[1:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]  # Ensure same length

# Alternatively, you can keep some of the detail coefficients if they contain meaningful information
# For example, keep the first level detail:
# denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
# denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]

# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original')
plt.plot(df.index, denoised_data, label='Denoised', linestyle='--')
plt.title("Original vs Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size


# ARIMA Hyperparameter tuning
param_grid = {
    'order': [(0,1,1), (1,1,1), (2,1,1), (0,1,0), (1,1,0), (2,0,0), (3,0,1),(0,1,1), (1,1,1), (2,1,1), (0,1,0), (1,1,0), (2,0,0), (3,0,1),(4,1,0),(2,0,1),(2,0,2),(3,0,7),(0,0,1),(0,1,2)]
}

best_score = float('inf')
best_params = None
best_model = None

print("Training ARIMA on denoised data...")

for order in param_grid['order']:
    try:
        model = ARIMA(denoised_data[:train_size], order=order)
        model_fit = model.fit()
        val_pred = model_fit.forecast(steps=val_size)
        score = mean_squared_error(
            denoised_data[train_size:train_size+val_size], val_pred
        )

        if score < best_score:
            best_score = score
            best_params = {'order': order}
            best_model = model_fit

    except Exception as e:
        print(f"Failed for {order}: {e}")
        continue

if best_params is None:
    raise ValueError("No ARIMA model could be fitted. Try expanding param_grid.")

print(f"Best params: {best_params} with MSE: {best_score:.4f}")

# Final model
final_model = ARIMA(
    denoised_data[:train_size+val_size], 
    order=best_params['order']
)
final_model_fit = final_model.fit()

# Make predictions
test_predictions = final_model_fit.forecast(steps=test_size)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Evaluation metrics
y_true = data[train_size+val_size:]
y_pred = test_predictions

mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# Calculate TSS, RSS, ESS
y_mean = np.mean(y_true)
tss = np.sum((y_true - y_mean) ** 2)
rss = np.sum((y_true - y_pred) ** 2)
ess = tss - rss   # Explained Sum of Squares

# Directional Accuracy (DA)
true_diff = np.diff(y_true)
pred_diff = np.diff(y_pred)
da = np.mean(np.sign(true_diff) == np.sign(pred_diff)) * 100

print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"TSS: {tss:.4f}")
print(f"RSS: {rss:.4f}")
print(f"ESS: {ess:.4f}")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")

# Create a DataFrame for comparison
results_df = pd.DataFrame({
    "Date": df.index[train_size+val_size:],
    "Actual": y_true,
    "Predicted": y_pred
})

print("\nActual vs Predicted Values:")
print(results_df.head(20))  # Show first 20 rows (change as needed)
    
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], y_true, label='Actual')
plt.plot(df.index[train_size+val_size:], y_pred, label='Predicted', linestyle='--')
plt.title("Wavelet-SARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result2.png", dpi=300, bbox_inches='tight')
plt.show()



# Model diagnostics
plt.figure(figsize=(12, 6))
print("\nSARIMA Model Summary:")
print(final_model_fit.summary())
final_model_fit.plot_diagnostics(figsize=(12, 8))
plt.suptitle("Model Diagnostics", y=1.02)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
Training ARIMA on denoised data...
Best params: {'order': (2, 0, 2)} with MSE: 19541.3318

Final Evaluation Metrics:
MSE: 600053.5119
RMSE: 774.6312
MAE: 653.1152
MAPE: 0.3622%
TSS: 21666634.8624
RSS: 65405832.7919
ESS: -43739197.9295
R²: -2.0187
Directional Accuracy (DA): 24.07%

Actual vs Predicted Values:
         Date  Actual   Predicted
0  2022-10-02   975.0  926.366548
1  2022-10-09   975.0  923.112194
2  2022-10-16   850.0  920.442949
3  2022-10-23   900.0  918.283332
4  2022-10-30   900.0  916.567174
5  2022-11-06   900.0  915.236476
6  2022-11-13   900.0  914.240411
7  2022-11-20   900.0  913.534442
8  2022-11-27   900.0  913.079556
9  2022-12-04   850.0  912.841581
10 2022-12-11   850.0  912.790599
11 2022-12-18   875.0  912.900420
12 2022-12-25   875.0  913.148127
13 2023-01-01   875.0  913.513671
14 2023-01-08   820.0  913.979524
15 2023-01-15   925.0  914.530365
16 2023-01-22   825.0  915.152813
17 2023-01-29   950.0  915.835183
18 2023-02-05   950.0  916.567282
19 2023-02-12   950.0  917.340223
No description has been provided for this image
SARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  613
Model:                 ARIMA(2, 0, 2)   Log Likelihood               -2067.804
Date:                Thu, 06 Nov 2025   AIC                           4147.608
Time:                        10:55:45   BIC                           4174.119
Sample:                             0   HQIC                          4157.918
                                - 613                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        972.3970    320.186      3.037      0.002     344.844    1599.950
ar.L1          1.8567      0.013    143.524      0.000       1.831       1.882
ar.L2         -0.8592      0.013    -65.286      0.000      -0.885      -0.833
ma.L1          0.9831      0.520      1.892      0.059      -0.035       2.002
ma.L2         -0.0168      0.037     -0.455      0.649      -0.089       0.055
sigma2        48.4313     25.221      1.920      0.055      -1.001      97.863
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):              6959.44
Prob(Q):                              0.98   Prob(JB):                         0.00
Heteroskedasticity (H):               9.98   Skew:                             1.32
Prob(H) (two-sided):                  0.00   Kurtosis:                        19.29
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<Figure size 1200x600 with 0 Axes>
No description has been provided for this image
In [ ]:
 

--- Search for SARIMA Parameters (Grid Search + Time Series CV) ---¶

Fit SARIMA¶

In [35]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from itertools import product
from sklearn.preprocessing import MinMaxScaler

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

# Perform DWT decomposition with db4 at level 3
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)

# Denoise by keeping only the approximation coefficients (remove high-frequency details)
denoised_coeffs = [coeffs[0]] + [np.zeros_like(c) for c in coeffs[1:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]  # Ensure same length

# Alternatively, you can keep some of the detail coefficients if they contain meaningful information
# For example, keep the first level detail:
# denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
# denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]

# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original')
plt.plot(df.index, denoised_data, label='Denoised', linestyle='--')
plt.title("Original vs Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

# SARIMA Hyperparameter tuning (simplified)
seasonal_period = 26  # Adjust based on your data's seasonality

# Reduced parameter grid for faster training
param_grid = {
    'order': [ (0,1,0)],  # Common ARIMA orders
    'seasonal_order': [ (1,1,1,seasonal_period)]  # Common seasonal orders
}

best_score = float('inf')
best_params = None
best_model = None

print("Training SARIMA on denoised data...")

for order, seasonal_order in product(param_grid['order'], param_grid['seasonal_order']):
    try:
        model = SARIMAX(denoised_data[:train_size], 
                       order=order,
                       seasonal_order=seasonal_order,
                       enforce_stationarity=False,
                       enforce_invertibility=False)
        
        model_fit = model.fit(disp=False)
        val_pred = model_fit.forecast(steps=val_size)
        score = mean_squared_error(denoised_data[train_size:train_size+val_size], val_pred)
        
        if score < best_score:
            best_score = score
            best_params = {'order': order, 'seasonal_order': seasonal_order}
            best_model = model_fit
            
    except Exception as e:
        continue

print(f"Best params: {best_params} with MSE: {best_score:.4f}")

# Final model training on train+validation
final_model = SARIMAX(denoised_data[:train_size+val_size], 
                     order=best_params['order'],
                     seasonal_order=best_params['seasonal_order'],
                     enforce_stationarity=False,
                     enforce_invertibility=False)
final_model_fit = final_model.fit(disp=False)

# Make predictions
test_predictions = final_model_fit.forecast(steps=test_size)

# ... your existing code for making predictions ...

# Extract the actual test values and the predictions
y_actual = data[train_size+val_size:]
y_pred = test_predictions

# Ensure they are the same length (a good practice)
min_length = min(len(y_actual), len(y_pred))
y_actual = y_actual[:min_length]
y_pred = y_pred[:min_length]

# Standard Regression Metrics
mse = mean_squared_error(y_actual, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_actual, y_pred)
mape = mean_absolute_percentage_error(y_actual, y_pred)
r2 = r2_score(y_actual, y_pred)

# Sum of Squares Calculations
mean_actual = np.mean(y_actual)
tss = np.sum((y_actual - mean_actual) ** 2)  # Total Sum of Squares
rss = np.sum((y_actual - y_pred) ** 2)       # Residual Sum of Squares
ess = np.sum((y_pred - mean_actual) ** 2)    # Explained Sum of Squares
# Note: TSS should equal RSS + ESS (within floating-point precision)

# Directional Accuracy (DA) Calculation
# First, calculate the actual change and the predicted change
actual_changes = np.diff(y_actual)  # e.g., [y1-y0, y2-y1, ...]
predicted_changes = np.diff(y_pred) # e.g., [y_hat1-y_hat0, y_hat2-y_hat1, ...]

# Check if the direction (sign) of the change is the same
correct_direction = np.sign(actual_changes) == np.sign(predicted_changes)

# Calculate the percentage of correct directions
# We use the first value of the differences, so the number of predictions is len(actual_changes)
da = (np.sum(correct_direction) / len(actual_changes)) * 100

print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"TSS: {tss:.4f}")
print(f"RSS: {rss:.4f}")
print(f"ESS: {ess:.4f}")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], data[train_size+val_size:], label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', linestyle='--')
plt.title("Wavelet-SARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result2.png", dpi=300, bbox_inches='tight')
plt.show()

# Model diagnostics
print("\nSARIMA Model Summary:")
print(final_model_fit.summary())
final_model_fit.plot_diagnostics(figsize=(12, 8))
plt.suptitle("Model Diagnostics", y=1.02)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
Training SARIMA on denoised data...
Best params: {'order': (0, 1, 0), 'seasonal_order': (1, 1, 1, 26)} with MSE: 584371.4103

Final Evaluation Metrics:
MSE: 644062.8720
RMSE: 802.5353
MAE: 677.0230
MAPE: 0.3750%
TSS: 21666634.8624
RSS: 70202853.0449
ESS: 48901362.6727
R²: -2.2401
Directional Accuracy (DA): 26.85%
No description has been provided for this image
SARIMA Model Summary:
                                      SARIMAX Results                                       
============================================================================================
Dep. Variable:                                    y   No. Observations:                  613
Model:             SARIMAX(0, 1, 0)x(1, 1, [1], 26)   Log Likelihood               -2655.903
Date:                              Thu, 06 Nov 2025   AIC                           5317.806
Time:                                      10:56:56   BIC                           5330.784
Sample:                                           0   HQIC                          5322.874
                                              - 613                                         
Covariance Type:                                opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.S.L26       0.0384      0.026      1.453      0.146      -0.013       0.090
ma.S.L26      -1.0000     72.287     -0.014      0.989    -142.679     140.679
sigma2       689.6068   4.99e+04      0.014      0.989   -9.71e+04    9.84e+04
===================================================================================
Ljung-Box (L1) (Q):                 481.69   Jarque-Bera (JB):              1714.59
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):              15.92   Skew:                             0.71
Prob(H) (two-sided):                  0.00   Kurtosis:                        11.46
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image

Wavelet +LSTM¶

Import Libraries for Wavelet +LSTM¶

In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: Wavelet Decomposition ---
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)

# Keep approximation coefficients and first level details for meaningful information
denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]

# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 2: Data Preparation for LSTM ---
# Use denoised data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(denoised_data.reshape(-1, 1))

# Create sequences for LSTM
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 3: Hyperparameter Tuning ---
def build_model(hp):
    model = Sequential()
    
    # Number of LSTM layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'  # Using ReLU activation
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='wavelet_lstm_tuning',
    project_name='cardamom_wavelet_lstm'
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
    print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")

# --- Step 4: Build and Train Final Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final model...")
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_denoised = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on denoised data
metrics_denoised = evaluate_forecast(y_actual_denoised, y_pred)

# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)

print("\n" + "="*60)
print("WAVELET-LSTM MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Wavelet used: {wavelet} level {level}")
print("\nModel Architecture:")
final_model.summary()

print("\n" + "="*60)
print("EVALUATION ON DENOISED DATA")
print("="*60)
for metric, value in metrics_denoised.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]



# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Wavelet-LSTM Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Denoised vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, denoised_data, label='Wavelet Denoised', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs Wavelet-Denoised vs Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates, 
                 y_pred - metrics_original['RMSE'], 
                 y_pred + metrics_original['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Residual Analysis ---
residuals = y_actual_original - y_pred

plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result6.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nResidual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 9: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST (NEXT 12 WEEKS)")
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='Future Forecast', color='red', linestyle='--', linewidth=2)
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('Future Price Forecast (Next 12 Weeks)')
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/wlst_result7.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")
C:\Users\marti\AppData\Local\Temp\ipykernel_24668\1254446090.py:12: DeprecationWarning: `import kerastuner` is deprecated, please use `import keras_tuner`.
  from kerastuner.tuners import RandomSearch
Original data length: 722
No description has been provided for this image
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting hyperparameter tuning...
Reloading Tuner from wavelet_lstm_tuning\cardamom_wavelet_lstm\tuner0.json

Best Hyperparameters:
Number of LSTM layers: 1
Learning rate: 0.003792091345898107
LSTM layer 1 units: 192
LSTM layer 1 dropout: 0.4

Training final model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 65ms/step - loss: 0.0645 - mae: 0.1601 - val_loss: 0.0049 - val_mae: 0.0622
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0083 - mae: 0.0580 - val_loss: 9.0763e-04 - val_mae: 0.0205
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0077 - mae: 0.0535 - val_loss: 8.9447e-04 - val_mae: 0.0229
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0061 - mae: 0.0517 - val_loss: 8.8476e-04 - val_mae: 0.0218
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0056 - mae: 0.0414 - val_loss: 8.5948e-04 - val_mae: 0.0221
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0062 - mae: 0.0453 - val_loss: 7.6791e-04 - val_mae: 0.0205
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0052 - mae: 0.0379 - val_loss: 0.0013 - val_mae: 0.0291
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0035 - mae: 0.0355 - val_loss: 0.0022 - val_mae: 0.0406
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0071 - mae: 0.0555 - val_loss: 0.0014 - val_mae: 0.0315
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0055 - mae: 0.0477 - val_loss: 7.8204e-04 - val_mae: 0.0203
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0048 - mae: 0.0371 - val_loss: 4.8616e-04 - val_mae: 0.0146
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0037 - mae: 0.0327 - val_loss: 5.4265e-04 - val_mae: 0.0155
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0049 - mae: 0.0387 - val_loss: 7.9955e-04 - val_mae: 0.0234
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0034 - mae: 0.0333 - val_loss: 5.1381e-04 - val_mae: 0.0161
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0032 - mae: 0.0308 - val_loss: 7.3843e-04 - val_mae: 0.0218
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0029 - mae: 0.0315 - val_loss: 6.6110e-04 - val_mae: 0.0211
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0363 - val_loss: 8.3089e-04 - val_mae: 0.0242
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0038 - mae: 0.0356 - val_loss: 4.2480e-04 - val_mae: 0.0146
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0031 - mae: 0.0322 - val_loss: 2.9074e-04 - val_mae: 0.0117
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0035 - mae: 0.0317 - val_loss: 6.1603e-04 - val_mae: 0.0211
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0038 - mae: 0.0354 - val_loss: 0.0014 - val_mae: 0.0321
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0029 - mae: 0.0318 - val_loss: 2.7690e-04 - val_mae: 0.0128
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0043 - mae: 0.0372 - val_loss: 6.1362e-04 - val_mae: 0.0195
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0028 - mae: 0.0317 - val_loss: 6.9217e-04 - val_mae: 0.0207
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0297 - val_loss: 3.3069e-04 - val_mae: 0.0148
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 3.3206e-04 - val_mae: 0.0142
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0288 - val_loss: 2.3746e-04 - val_mae: 0.0111
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0026 - mae: 0.0283 - val_loss: 2.8762e-04 - val_mae: 0.0123
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0274 - val_loss: 2.4683e-04 - val_mae: 0.0111
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 2.7630e-04 - val_mae: 0.0121
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0321 - val_loss: 4.4869e-04 - val_mae: 0.0180
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0027 - mae: 0.0301 - val_loss: 2.7686e-04 - val_mae: 0.0130
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0026 - mae: 0.0293 - val_loss: 2.3408e-04 - val_mae: 0.0109
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0028 - mae: 0.0310 - val_loss: 7.1281e-04 - val_mae: 0.0242
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0280 - val_loss: 3.3877e-04 - val_mae: 0.0145
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0293 - val_loss: 2.0179e-04 - val_mae: 0.0105
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0015 - mae: 0.0252 - val_loss: 1.8896e-04 - val_mae: 0.0105
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0015 - mae: 0.0219 - val_loss: 8.8271e-04 - val_mae: 0.0263
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0032 - mae: 0.0310 - val_loss: 2.9015e-04 - val_mae: 0.0126
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0260 - val_loss: 3.6359e-04 - val_mae: 0.0155
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0285 - val_loss: 7.0972e-04 - val_mae: 0.0243
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0031 - mae: 0.0316 - val_loss: 4.9701e-04 - val_mae: 0.0191
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0277 - val_loss: 4.1972e-04 - val_mae: 0.0168
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0026 - mae: 0.0302 - val_loss: 0.0010 - val_mae: 0.0280
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0322 - val_loss: 3.0194e-04 - val_mae: 0.0124
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0018 - mae: 0.0275 - val_loss: 4.0251e-04 - val_mae: 0.0175
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0253 - val_loss: 3.9413e-04 - val_mae: 0.0153
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0277 - val_loss: 2.8753e-04 - val_mae: 0.0138
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0285 - val_loss: 1.8861e-04 - val_mae: 0.0105
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0247 - val_loss: 6.1732e-04 - val_mae: 0.0224
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0030 - mae: 0.0318 - val_loss: 3.3542e-04 - val_mae: 0.0154
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0278 - val_loss: 2.7674e-04 - val_mae: 0.0128
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0023 - mae: 0.0272 - val_loss: 1.7684e-04 - val_mae: 0.0101
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0263 - val_loss: 3.4157e-04 - val_mae: 0.0137
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0275 - val_loss: 0.0014 - val_mae: 0.0349
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0027 - mae: 0.0347 - val_loss: 2.0435e-04 - val_mae: 0.0106
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0026 - mae: 0.0300 - val_loss: 3.0771e-04 - val_mae: 0.0149
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0030 - mae: 0.0315 - val_loss: 2.6914e-04 - val_mae: 0.0139
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0267 - val_loss: 3.0085e-04 - val_mae: 0.0134
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0020 - mae: 0.0275 - val_loss: 1.5226e-04 - val_mae: 0.0098
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0310 - val_loss: 1.4427e-04 - val_mae: 0.0085
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0258 - val_loss: 7.7064e-04 - val_mae: 0.0235
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0293 - val_loss: 4.6110e-04 - val_mae: 0.0176
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0015 - mae: 0.0262 - val_loss: 3.5620e-04 - val_mae: 0.0165
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0254 - val_loss: 0.0010 - val_mae: 0.0295
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0333 - val_loss: 0.0015 - val_mae: 0.0356
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0023 - mae: 0.0338 - val_loss: 4.7742e-04 - val_mae: 0.0187
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0261 - val_loss: 2.6976e-04 - val_mae: 0.0136
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0014 - mae: 0.0247 - val_loss: 1.2351e-04 - val_mae: 0.0081
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0291 - val_loss: 2.0897e-04 - val_mae: 0.0109
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0265 - val_loss: 1.5730e-04 - val_mae: 0.0095
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0018 - mae: 0.0254 - val_loss: 1.5710e-04 - val_mae: 0.0091
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0016 - mae: 0.0251 - val_loss: 1.0221e-04 - val_mae: 0.0074
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0263 - val_loss: 1.4163e-04 - val_mae: 0.0093
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0019 - mae: 0.0266 - val_loss: 1.1923e-04 - val_mae: 0.0080
Epoch 76/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0273 - val_loss: 1.1192e-04 - val_mae: 0.0084
Epoch 77/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0013 - mae: 0.0252 - val_loss: 0.0013 - val_mae: 0.0323
Epoch 78/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0274 - val_loss: 7.0122e-04 - val_mae: 0.0217
Epoch 79/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0299 - val_loss: 1.3008e-04 - val_mae: 0.0088
Epoch 80/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0019 - mae: 0.0287 - val_loss: 1.5515e-04 - val_mae: 0.0096
Epoch 81/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0250 - val_loss: 2.4368e-04 - val_mae: 0.0132
Epoch 82/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0267 - val_loss: 5.4898e-04 - val_mae: 0.0207
Epoch 83/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0289 - val_loss: 3.7882e-04 - val_mae: 0.0155
Epoch 84/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0274 - val_loss: 1.7719e-04 - val_mae: 0.0108
Epoch 85/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0019 - mae: 0.0279 - val_loss: 1.1283e-04 - val_mae: 0.0079
Epoch 86/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0018 - mae: 0.0277 - val_loss: 1.0534e-04 - val_mae: 0.0081
Epoch 87/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0013 - mae: 0.0241 - val_loss: 2.6526e-04 - val_mae: 0.0125
Epoch 88/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0013 - mae: 0.0239 - val_loss: 1.5702e-04 - val_mae: 0.0099
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 93ms/step

============================================================
WAVELET-LSTM MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 88
Best validation loss: 0.0001
Best validation MAE: 0.0074
Lookback period: 52 weeks
Wavelet used: db4 level 3

Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                          │ (None, 192)                 │         148,992 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout)                    │ (None, 192)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 16)                  │           3,088 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_1 (Dropout)                  │ (None, 16)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 1)                   │              17 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 456,293 (1.74 MB)
 Trainable params: 152,097 (594.13 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 304,196 (1.16 MB)
============================================================
EVALUATION ON DENOISED DATA
============================================================
MSE: 8020.6906
RMSE: 89.5583
MAE: 69.3841
MAPE: 0.04%
R²: 0.9505
Directional Accuracy: 83.17%

============================================================
EVALUATION ON ORIGINAL DATA
============================================================
MSE: 19622.4330
RMSE: 140.0801
MAE: 102.9759
MAPE: 0.07%
R²: 0.8907
Directional Accuracy: 37.62%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Residual Analysis:
Residual mean: 55.2330
Residual std: 128.7313
Residual min: -322.7833
Residual max: 439.5608

==================================================
FUTURE FORECAST (NEXT 12 WEEKS)
==================================================
2024-11-03: 2225.41
2024-11-10: 2106.23
2024-11-17: 1894.65
2024-11-24: 1694.13
2024-12-01: 1568.63
2024-12-08: 1519.14
2024-12-15: 1513.12
2024-12-22: 1516.49
2024-12-29: 1506.74
2025-01-05: 1477.39
2025-01-12: 1433.95
2025-01-19: 1386.47
No description has been provided for this image
In [28]:
# --- Step 11: Detailed Model Configuration Report & JSON Export ---
import json
import datetime
import platform
from tensorflow.keras import backend as K

print("\n" + "="*60)
print("WAVELET + LSTM MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)

report = {}

# Optimizer details
try:
    opt = final_model.optimizer
    opt_name = opt.__class__.__name__
    try:
        lr_val = float(K.get_value(opt.learning_rate))
    except Exception:
        opt_cfg = opt.get_config()
        lr_val = opt_cfg.get('learning_rate', None)
        try:
            lr_val = float(lr_val)
        except Exception:
            pass
    report['optimizer'] = opt_name
    report['learning_rate'] = lr_val
    print(f"Optimizer: {opt_name}")
    print(f"Learning Rate: {lr_val}")
except Exception as e:
    report['optimizer'] = str(e)
    print(f"Could not fetch optimizer details: {e}")

# Hyperparameters (from tuner if available)
try:
    report['best_hyperparameters'] = best_hp.values
    print("\nBest Hyperparameters (from tuner):")
    for k, v in best_hp.values.items():
        print(f"  {k}: {v}")
except Exception as e:
    report['best_hyperparameters'] = None
    print(f"No best hyperparameters found: {e}")

# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
    layer_info = {
        'index': i+1,
        'class_name': layer.__class__.__name__,
        'name': layer.name
    }
    if hasattr(layer, 'units'):
        layer_info['units'] = getattr(layer, 'units', None)
        print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
    else:
        print(f" Layer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, 'activation'):
        try:
            layer_info['activation'] = layer.activation.__name__
        except Exception:
            layer_info['activation'] = str(layer.activation)
    if hasattr(layer, 'rate'):
        layer_info['dropout_rate'] = getattr(layer, 'rate', None)
    if hasattr(layer, 'return_sequences'):
        layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
    try:
        layer_info['input_shape'] = layer.input_shape
        layer_info['output_shape'] = layer.output_shape
    except:
        layer_info['input_shape'] = None
        layer_info['output_shape'] = None
    layers_report.append(layer_info)

report['layers'] = layers_report

# Training summary
training_summary = {
    'lookback': lookback,
    'epochs_trained': len(history.history['loss']),
    'final_training_loss': float(history.history['loss'][-1]),
    'final_validation_loss': float(min(history.history['val_loss'])),
    'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
    'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
    'wavelet': wavelet,
    'wavelet_level': level
}
report['training_summary'] = training_summary

print("\nTraining Summary:")
for k, v in training_summary.items():
    print(f" {k}: {v}")

# Evaluation
try:
    report['evaluation_metrics'] = {
        'denoised': metrics_denoised,
        'original': metrics_original
    }
    print("\nEvaluation Metrics attached.")
except Exception as e:
    report['evaluation_metrics'] = None
    print(f"Could not attach evaluation metrics: {e}")

# Residual stats
try:
    residuals_stats = {
        'residual_mean': float(np.mean(residuals)),
        'residual_std': float(np.std(residuals)),
        'residual_min': float(np.min(residuals)),
        'residual_max': float(np.max(residuals))
    }
    report['residuals'] = residuals_stats
    print("\nResiduals Summary attached.")
except Exception as e:
    print(f"Residual stats failed: {e}")

# Future forecast (if available)
try:
    forecast_report = {
        'dates': [str(d) for d in future_dates],
        'forecasted_prices': [float(p) for p in future_prices]
    }
    report['future_forecast'] = forecast_report
    print("\nFuture forecast added to report.")
except Exception as e:
    report['future_forecast'] = None
    print(f"Future forecast not added: {e}")

# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__

# Save JSON
report_filename = "wavelet_lstm_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
    json.dump(report, f, indent=2, ensure_ascii=False)

print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
WAVELET + LSTM MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0037920912727713585

Best Hyperparameters (from tuner):
  num_layers: 1
  units_0: 192
  dropout_0: 0.4
  dense_layers: 1
  learning_rate: 0.003792091345898107
  dense_units_0: 16
  dense_dropout_0: 0.1

Model Layers:
 Layer 1: LSTM - units: 192
 Layer 2: Dropout
 Layer 3: Dense - units: 16
 Layer 4: Dropout
 Layer 5: Dense - units: 1

Training Summary:
 lookback: 52
 epochs_trained: 88
 final_training_loss: 0.00203199265524745
 final_validation_loss: 0.0001546310231788084
 final_training_mae: 0.028295811265707016
 final_validation_mae: 0.008852318860590458
 wavelet: db4
 wavelet_level: 3

Evaluation Metrics attached.

Residuals Summary attached.

Future forecast added to report.

Saved detailed report to: wavelet_lstm_report.json
============================================================
REPORT COMPLETE
============================================================
In [28]:
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.show()
No description has been provided for this image
In [3]:
# Plot 3: Separate View - Actual vs Predicted
# Plot original vs denoised data
plt.figure(figsize=(12, 6))

plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)

plt.title('Actual vs Predicted (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result17.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

Wavelet + GRU¶

Import Libraries¶

In [5]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout  # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: Wavelet Decomposition ---
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)

# Keep approximation coefficients and first level details for meaningful information
denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]

# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 2: Data Preparation for GRU ---
# Use denoised data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(denoised_data.reshape(-1, 1))

# Create sequences for GRU
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 3: Hyperparameter Tuning for GRU ---
def build_model(hp):
    model = Sequential()
    
    # Number of GRU layers (changed from LSTM to GRU)
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(GRU(  # Changed LSTM to GRU
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='wavelet_gru_tuning',  # Changed directory name
    project_name='cardamom_wavelet_gru'  # Changed project name
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}")  # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}")  # Changed from LSTM to GRU
    print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")  # Changed from LSTM to GRU

# --- Step 4: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final GRU model...")  # Changed to GRU
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_denoised = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on denoised data
metrics_denoised = evaluate_forecast(y_actual_denoised, y_pred)

# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)

print("\n" + "="*60)
print("WAVELET-GRU MODEL TRAINING SUMMARY")  # Changed to GRU
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Wavelet used: {wavelet} level {level}")
print("\nGRU Model Architecture:")  # Changed to GRU
final_model.summary()

print("\n" + "="*60)
print("EVALUATION ON DENOISED DATA")
print("="*60)
for metric, value in metrics_denoised.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]

plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Wavelet-GRU Training History')  # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Denoised vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, denoised_data, label='Wavelet Denoised', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs Wavelet-Denoised vs Forecast (GRU)')  # Added GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)  # Added GRU
plt.fill_between(test_dates, 
                 y_pred - metrics_original['RMSE'], 
                 y_pred + metrics_original['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - GRU Model (Test Period)')  # Added GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result4.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 8: Residual Analysis ---
residuals = y_actual_original - y_pred

plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Model Residuals Over Time')  # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('GRU Residual Distribution')  # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('GRU Residuals vs Predicted')  # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('GRU Residuals vs Actual')  # Added GRU
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result8.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nGRU Residual Analysis:")  # Added GRU
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 9: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)")  # Added GRU
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='GRU Future Forecast', color='red', linestyle='--', linewidth=2)  # Added GRU
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('GRU Future Price Forecast (Next 12 Weeks)')  # Added GRU
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/wlst_result9.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- GRU Benefits Summary ---
print("\n" + "="*60)
print("GRU MODEL ADVANTAGES OVER LSTM")
print("="*60)
print("1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)")
print("2. Faster Training: Less complex architecture leads to faster training times")
print("3. Better Performance: Often performs better on smaller datasets")
print("4. Reduced Overfitting: Simpler architecture can be less prone to overfitting")
print("5. Memory Efficiency: Uses less memory during training and inference")
print("6. Faster Convergence: Typically converges faster than LSTM")
print("7. Better Gradient Flow: Simpler architecture improves gradient propagation")
Original data length: 722
No description has been provided for this image
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting GRU hyperparameter tuning...
Reloading Tuner from wavelet_gru_tuning\cardamom_wavelet_gru\tuner0.json

Best Hyperparameters:
Number of GRU layers: 1
Learning rate: 0.002051386718289359
GRU layer 1 units: 224
GRU layer 1 dropout: 0.4

Training final GRU model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 81ms/step - loss: 0.0406 - mae: 0.1411 - val_loss: 6.6561e-04 - val_mae: 0.0173
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0042 - mae: 0.0385 - val_loss: 0.0012 - val_mae: 0.0282
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0039 - mae: 0.0371 - val_loss: 7.1862e-04 - val_mae: 0.0202
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0032 - mae: 0.0283 - val_loss: 4.0779e-04 - val_mae: 0.0146
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0032 - mae: 0.0290 - val_loss: 3.9818e-04 - val_mae: 0.0130
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0028 - mae: 0.0266 - val_loss: 3.1017e-04 - val_mae: 0.0116
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0020 - mae: 0.0252 - val_loss: 4.3022e-04 - val_mae: 0.0165
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0017 - mae: 0.0231 - val_loss: 2.4149e-04 - val_mae: 0.0107
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0016 - mae: 0.0229 - val_loss: 2.0879e-04 - val_mae: 0.0107
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 0.0014 - mae: 0.0220 - val_loss: 4.7442e-04 - val_mae: 0.0182
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0015 - mae: 0.0205 - val_loss: 2.3027e-04 - val_mae: 0.0106
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0193 - val_loss: 3.3755e-04 - val_mae: 0.0148
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0194 - val_loss: 2.1890e-04 - val_mae: 0.0126
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 9.8697e-04 - mae: 0.0228 - val_loss: 4.0378e-04 - val_mae: 0.0161
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0014 - mae: 0.0215 - val_loss: 1.5815e-04 - val_mae: 0.0095
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0010 - mae: 0.0188 - val_loss: 1.5241e-04 - val_mae: 0.0084
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 0.0011 - mae: 0.0190 - val_loss: 1.4074e-04 - val_mae: 0.0082
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0010 - mae: 0.0187 - val_loss: 3.3456e-04 - val_mae: 0.0148
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 9.8860e-04 - mae: 0.0196 - val_loss: 1.6235e-04 - val_mae: 0.0097
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0192 - val_loss: 1.2785e-04 - val_mae: 0.0080
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.5907e-04 - mae: 0.0182 - val_loss: 2.7821e-04 - val_mae: 0.0136
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0013 - mae: 0.0221 - val_loss: 2.2980e-04 - val_mae: 0.0119
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3321e-04 - mae: 0.0177 - val_loss: 2.2897e-04 - val_mae: 0.0127
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6326e-04 - mae: 0.0171 - val_loss: 1.8956e-04 - val_mae: 0.0103
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.0895e-04 - mae: 0.0169 - val_loss: 3.4315e-04 - val_mae: 0.0157
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 9.4998e-04 - mae: 0.0189 - val_loss: 1.5390e-04 - val_mae: 0.0087
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 8.7986e-04 - mae: 0.0175 - val_loss: 1.2559e-04 - val_mae: 0.0081
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.1703e-04 - mae: 0.0157 - val_loss: 1.1656e-04 - val_mae: 0.0074
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.9170e-04 - mae: 0.0159 - val_loss: 3.6592e-04 - val_mae: 0.0168
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.6841e-04 - mae: 0.0175 - val_loss: 1.4055e-04 - val_mae: 0.0097
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0012 - mae: 0.0193 - val_loss: 1.5999e-04 - val_mae: 0.0099
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.1401e-04 - mae: 0.0175 - val_loss: 1.4100e-04 - val_mae: 0.0098
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0010 - mae: 0.0189 - val_loss: 2.7553e-04 - val_mae: 0.0145
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.0730e-04 - mae: 0.0183 - val_loss: 1.3814e-04 - val_mae: 0.0083
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0213 - val_loss: 1.7513e-04 - val_mae: 0.0113
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6740e-04 - mae: 0.0216 - val_loss: 1.3451e-04 - val_mae: 0.0096
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.4420e-04 - mae: 0.0173 - val_loss: 9.7174e-05 - val_mae: 0.0077
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.9973e-04 - mae: 0.0190 - val_loss: 1.2269e-04 - val_mae: 0.0092
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.4614e-04 - mae: 0.0165 - val_loss: 8.1454e-05 - val_mae: 0.0066
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.5813e-04 - mae: 0.0154 - val_loss: 8.6391e-05 - val_mae: 0.0066
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 8.2527e-04 - mae: 0.0173 - val_loss: 1.5650e-04 - val_mae: 0.0106
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.0442e-04 - mae: 0.0175 - val_loss: 6.6731e-05 - val_mae: 0.0056
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.5386e-04 - mae: 0.0154 - val_loss: 2.5717e-04 - val_mae: 0.0135
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.9037e-04 - mae: 0.0177 - val_loss: 8.4732e-05 - val_mae: 0.0067
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.3333e-04 - mae: 0.0165 - val_loss: 1.2596e-04 - val_mae: 0.0085
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.2000e-04 - mae: 0.0142 - val_loss: 1.7775e-04 - val_mae: 0.0118
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 8.0606e-04 - mae: 0.0164 - val_loss: 1.2970e-04 - val_mae: 0.0097
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.4555e-04 - mae: 0.0141 - val_loss: 1.0925e-04 - val_mae: 0.0089
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.3674e-04 - mae: 0.0166 - val_loss: 6.4746e-05 - val_mae: 0.0057
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3739e-04 - mae: 0.0175 - val_loss: 8.6348e-05 - val_mae: 0.0071
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.1029e-04 - mae: 0.0151 - val_loss: 1.7950e-04 - val_mae: 0.0117
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0011 - mae: 0.0197 - val_loss: 3.9751e-04 - val_mae: 0.0182
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3818e-04 - mae: 0.0205 - val_loss: 5.6425e-05 - val_mae: 0.0052
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.1495e-04 - mae: 0.0180 - val_loss: 7.5006e-05 - val_mae: 0.0070
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.9230e-04 - mae: 0.0151 - val_loss: 1.7485e-04 - val_mae: 0.0087
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.1987e-04 - mae: 0.0149 - val_loss: 8.8326e-05 - val_mae: 0.0076
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 5.3549e-04 - mae: 0.0136 - val_loss: 7.0879e-05 - val_mae: 0.0057
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.2342e-04 - mae: 0.0139 - val_loss: 3.3876e-04 - val_mae: 0.0164
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.4244e-04 - mae: 0.0198 - val_loss: 2.4646e-04 - val_mae: 0.0141
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 8.2960e-04 - mae: 0.0169 - val_loss: 1.1223e-04 - val_mae: 0.0095
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.8341e-04 - mae: 0.0164 - val_loss: 6.6242e-05 - val_mae: 0.0053
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.1215e-04 - mae: 0.0138 - val_loss: 1.6948e-04 - val_mae: 0.0113
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.7228e-04 - mae: 0.0169 - val_loss: 1.9953e-04 - val_mae: 0.0109
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.2848e-04 - mae: 0.0156 - val_loss: 1.0757e-04 - val_mae: 0.0087
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 5.9780e-04 - mae: 0.0149 - val_loss: 3.3949e-04 - val_mae: 0.0171
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.5201e-04 - mae: 0.0142 - val_loss: 5.9108e-05 - val_mae: 0.0054
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.9726e-04 - mae: 0.0153 - val_loss: 5.4804e-05 - val_mae: 0.0052
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 4.8245e-04 - mae: 0.0132 - val_loss: 4.9300e-05 - val_mae: 0.0049
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 7.5551e-04 - mae: 0.0158 - val_loss: 7.8496e-05 - val_mae: 0.0071
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 58ms/step - loss: 5.9315e-04 - mae: 0.0149 - val_loss: 2.4533e-04 - val_mae: 0.0144
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 55ms/step - loss: 7.0124e-04 - mae: 0.0162 - val_loss: 2.0084e-04 - val_mae: 0.0113
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 53ms/step - loss: 5.0515e-04 - mae: 0.0129 - val_loss: 6.2962e-05 - val_mae: 0.0056
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.2517e-04 - mae: 0.0142 - val_loss: 4.6976e-05 - val_mae: 0.0047
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.4673e-04 - mae: 0.0144 - val_loss: 5.9757e-05 - val_mae: 0.0058
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 6.5002e-04 - mae: 0.0150 - val_loss: 7.1378e-05 - val_mae: 0.0066
Epoch 76/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6939e-04 - mae: 0.0202 - val_loss: 4.3667e-04 - val_mae: 0.0194
Epoch 77/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.9345e-04 - mae: 0.0185 - val_loss: 6.8965e-05 - val_mae: 0.0063
Epoch 78/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 5.9977e-04 - mae: 0.0167 - val_loss: 7.0341e-05 - val_mae: 0.0060
Epoch 79/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 5.3396e-04 - mae: 0.0134 - val_loss: 6.6781e-05 - val_mae: 0.0064
Epoch 80/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.8462e-04 - mae: 0.0165 - val_loss: 1.0822e-04 - val_mae: 0.0086
Epoch 81/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 5.0874e-04 - mae: 0.0144 - val_loss: 2.3464e-04 - val_mae: 0.0137
Epoch 82/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.2562e-04 - mae: 0.0149 - val_loss: 5.8219e-05 - val_mae: 0.0064
Epoch 83/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.5684e-04 - mae: 0.0163 - val_loss: 1.1107e-04 - val_mae: 0.0089
Epoch 84/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.2826e-04 - mae: 0.0177 - val_loss: 1.6702e-04 - val_mae: 0.0097
Epoch 85/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.6232e-04 - mae: 0.0156 - val_loss: 6.8336e-05 - val_mae: 0.0054
Epoch 86/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.9771e-04 - mae: 0.0152 - val_loss: 8.6466e-05 - val_mae: 0.0064
Epoch 87/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.5203e-04 - mae: 0.0169 - val_loss: 5.1111e-05 - val_mae: 0.0047
Epoch 88/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 4.6553e-04 - mae: 0.0142 - val_loss: 2.0932e-04 - val_mae: 0.0125
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step

============================================================
WAVELET-GRU MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 88
Best validation loss: 0.0000
Best validation MAE: 0.0047
Lookback period: 52 weeks
Wavelet used: db4 level 3

GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ gru (GRU)                            │ (None, 224)                 │         152,544 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_2 (Dropout)                  │ (None, 224)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_2 (Dense)                      │ (None, 1)                   │             225 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 458,309 (1.75 MB)
 Trainable params: 152,769 (596.75 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 305,540 (1.17 MB)
============================================================
EVALUATION ON DENOISED DATA
============================================================
MSE: 4140.1010
RMSE: 64.3436
MAE: 48.9869
MAPE: 0.03%
R²: 0.9744
Directional Accuracy: 83.17%

============================================================
EVALUATION ON ORIGINAL DATA
============================================================
MSE: 16242.7976
RMSE: 127.4472
MAE: 93.1135
MAPE: 0.06%
R²: 0.9095
Directional Accuracy: 38.61%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
GRU Residual Analysis:
Residual mean: 32.9018
Residual std: 123.1271
Residual min: -388.7794
Residual max: 434.9351

==================================================
FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)
==================================================
2024-11-03: 2306.12
2024-11-10: 2252.24
2024-11-17: 2101.93
2024-11-24: 1919.64
2024-12-01: 1756.78
2024-12-08: 1656.57
2024-12-15: 1622.91
2024-12-22: 1627.00
2024-12-29: 1631.12
2025-01-05: 1611.09
2025-01-12: 1563.73
2025-01-19: 1501.98
No description has been provided for this image
============================================================
GRU MODEL ADVANTAGES OVER LSTM
============================================================
1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)
2. Faster Training: Less complex architecture leads to faster training times
3. Better Performance: Often performs better on smaller datasets
4. Reduced Overfitting: Simpler architecture can be less prone to overfitting
5. Memory Efficiency: Uses less memory during training and inference
6. Faster Convergence: Typically converges faster than LSTM
7. Better Gradient Flow: Simpler architecture improves gradient propagation
In [32]:
# --- Step 10: Detailed Model Configuration Report & JSON Export ---
import json
import datetime
import platform
from tensorflow.keras import backend as K

print("\n" + "="*60)
print("WAVELET + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)

report = {}

# Optimizer details
try:
    opt = final_model.optimizer
    opt_name = opt.__class__.__name__
    try:
        lr_val = float(K.get_value(opt.learning_rate))
    except Exception:
        opt_cfg = opt.get_config()
        lr_val = opt_cfg.get('learning_rate', None)
        try:
            lr_val = float(lr_val)
        except Exception:
            pass
    report['optimizer'] = opt_name
    report['learning_rate'] = lr_val
    print(f"Optimizer: {opt_name}")
    print(f"Learning Rate: {lr_val}")
except Exception as e:
    report['optimizer'] = str(e)
    print(f"Could not fetch optimizer details: {e}")

# Hyperparameters (from tuner if available)
try:
    report['best_hyperparameters'] = best_hp.values
    print("\nBest Hyperparameters (from tuner):")
    for k, v in best_hp.values.items():
        print(f"  {k}: {v}")
except Exception as e:
    report['best_hyperparameters'] = None
    print(f"No best hyperparameters found: {e}")

# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
    layer_info = {
        'index': i+1,
        'class_name': layer.__class__.__name__,
        'name': layer.name
    }
    if hasattr(layer, 'units'):
        layer_info['units'] = getattr(layer, 'units', None)
        print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
    else:
        print(f" Layer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, 'activation'):
        try:
            layer_info['activation'] = layer.activation.__name__
        except Exception:
            layer_info['activation'] = str(layer.activation)
    if hasattr(layer, 'rate'):
        layer_info['dropout_rate'] = getattr(layer, 'rate', None)
    if hasattr(layer, 'return_sequences'):
        layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
    try:
        layer_info['input_shape'] = layer.input_shape
        layer_info['output_shape'] = layer.output_shape
    except:
        layer_info['input_shape'] = None
        layer_info['output_shape'] = None
    layers_report.append(layer_info)

report['layers'] = layers_report

# Training summary
training_summary = {
    'lookback': lookback,
    'epochs_trained': len(history.history['loss']),
    'final_training_loss': float(history.history['loss'][-1]),
    'final_validation_loss': float(min(history.history['val_loss'])),
    'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
    'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
    'wavelet': wavelet,
    'wavelet_level': level
}
report['training_summary'] = training_summary

print("\nTraining Summary:")
for k, v in training_summary.items():
    print(f" {k}: {v}")

# Evaluation
try:
    report['evaluation_metrics'] = {
        'denoised': metrics_denoised,
        'original': metrics_original
    }
    print("\nEvaluation Metrics attached.")
except Exception as e:
    report['evaluation_metrics'] = None
    print(f"Could not attach evaluation metrics: {e}")

# Residual stats
try:
    residuals_stats = {
        'residual_mean': float(np.mean(residuals)),
        'residual_std': float(np.std(residuals)),
        'residual_min': float(np.min(residuals)),
        'residual_max': float(np.max(residuals))
    }
    report['residuals'] = residuals_stats
    print("\nResiduals Summary attached.")
except Exception as e:
    print(f"Residual stats failed: {e}")

# Future forecast (if available)
try:
    forecast_report = {
        'dates': [str(d) for d in future_dates],
        'forecasted_prices': [float(p) for p in future_prices]
    }
    report['future_forecast'] = forecast_report
    print("\nFuture forecast added to report.")
except Exception as e:
    report['future_forecast'] = None
    print(f"Future forecast not added: {e}")

# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__

# Save JSON
report_filename = "wavelet_gru_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
    json.dump(report, f, indent=2, ensure_ascii=False)

print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
WAVELET + GRU MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0020513867493718863

Best Hyperparameters (from tuner):
  num_layers: 1
  units_0: 224
  dropout_0: 0.4
  dense_layers: 0
  learning_rate: 0.002051386718289359
  units_1: 96
  dropout_1: 0.4
  units_2: 160
  dropout_2: 0.30000000000000004
  dense_units_0: 112
  dense_dropout_0: 0.2
  dense_units_1: 112
  dense_dropout_1: 0.2

Model Layers:
 Layer 1: GRU - units: 224
 Layer 2: Dropout
 Layer 3: Dense - units: 1

Training Summary:
 lookback: 52
 epochs_trained: 82
 final_training_loss: 0.0003886503691319376
 final_validation_loss: 4.182316843071021e-05
 final_training_mae: 0.012498756870627403
 final_validation_mae: 0.004680353216826916
 wavelet: db4
 wavelet_level: 3

Evaluation Metrics attached.

Residuals Summary attached.

Future forecast added to report.

Saved detailed report to: wavelet_gru_report.json
============================================================
REPORT COMPLETE
============================================================
In [33]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)

# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")

# Model architecture details
for i, layer in enumerate(final_model.layers):
    print(f"\nLayer {i+1}: {layer.__class__.__name__}")
    try:
        print(f"  Units: {layer.units}")
    except:
        pass
    try:
        print(f"  Activation: {layer.activation.__name__}")
    except:
        pass
    try:
        print(f"  Dropout Rate: {layer.rate}")
    except:
        pass
    if hasattr(layer, "return_sequences"):
        print(f"  Return Sequences: {layer.return_sequences}")

# Training summary
print("\nTraining Details:")
print(f"Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")


print("\n" + "="*60)
print("NOTE: The above configuration includes optimizer, activation functions, "
      "learning rate, and automatic layer details for full reproducibility.")
print("="*60)
============================================================
GRU MODEL CONFIGURATION & TRAINING DETAILS
============================================================
Optimizer: Adam
Learning Rate: 0.0020513867493718863

Layer 1: GRU
  Units: 224
  Activation: tanh
  Return Sequences: False

Layer 2: Dropout
  Dropout Rate: 0.4

Layer 3: Dense
  Units: 1
  Activation: linear

Training Details:
Epochs Trained: 82
Final Training Loss: 0.0004
Final Validation Loss: 0.0001
Final Training MAE: 0.0125
Final Validation MAE: 0.0055

============================================================
NOTE: The above configuration includes optimizer, activation functions, learning rate, and automatic layer details for full reproducibility.
============================================================
In [7]:
# Plot 3: Separate View - Actual vs Predicted

plt.figure(figsize=(12, 8))


plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)  # Added GRU

plt.title('Actual vs Predicted - GRU Model (Test Period)')  # Added GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result19.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [11]:
import pywt

# Assume 'data' is your time series (e.g., prices)
# Perform 3-level Haar wavelet decomposition
coeffs_original = pywt.wavedec(data, 'haar', level=3)  # data should be your time series

labels = ['a3', 'd3', 'd2', 'd1']
plt.figure(figsize=(12, 8))
for i, c in enumerate(coeffs_original):
    plt.subplot(len(coeffs_original), 1, i + 1)
    plt.plot(c, label=labels[i])
    plt.legend(loc='upper right')
    plt.grid(True)
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/wlst_result119.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

Empirical Mode Decomposition¶

EMD + ARIMA¶

In [3]:
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Import matplotlib.pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
warnings.filterwarnings("ignore")
In [142]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [143]:
df.head()
Out[143]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal) Date
0 Kerala Idukki Nedumkandam Green Medium Spices 13.0 1500 1700 1650 2010-06-08
1 Kerala Idukki Nedumkandam Green Medium Spices 11.0 1300 1750 1500 2010-06-13
2 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1800 1600 2010-06-20
3 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1300 1800 1650 2010-06-27
4 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1850 1600 2010-07-11
In [144]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [145]:
df['Modal Price (Rs./kg)']=df['Modal Price (Rs./Quintal)']/100
In [148]:
data = df['Modal Price (Rs./kg)'].values
In [149]:
df.head()
Out[149]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal) Modal Price (Rs./kg)
Date
2010-06-08 Kerala Idukki Nedumkandam Green Medium Spices 13.0 1500 1700 1650 16.5
2010-06-13 Kerala Idukki Nedumkandam Green Medium Spices 11.0 1300 1750 1500 15.0
2010-06-20 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1800 1600 16.0
2010-06-27 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1300 1800 1650 16.5
2010-07-11 Kerala Idukki Nedumkandam Green Medium Spices 16.5 1400 1850 1600 16.0

Plot original data¶

In [151]:
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Cardamom Price') # Specify x and y data
plt.title('Weekly Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('price')
plt.legend()
plt.grid()
plt.show()
No description has been provided for this image

=== Step 2: Apply EMD ===¶

In [156]:
from PyEMD import EMD

from PyEMD import EMD
emd = EMD()
imfs = emd.emd(data)
selected_imfs = imfs[3:6] 

=== Step 3: Visualize IMFs ===¶

In [160]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1}")
    plt.grid()

plt.tight_layout()
plt.show()
No description has been provided for this image

=== Step 4: Train/Val/Test Split ===¶

In [163]:
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

train_val_data = data[:train_size + val_size]
test_data = data[train_size + val_size:]
In [164]:
# Plot the data split without inverse transformation
plt.figure(figsize=(12, 5))
plt.plot(np.arange(total_size), data, label='Full Data')
plt.axvspan(0, train_end, color='green', alpha=0.2, label='Train')
plt.axvspan(train_end, val_end, color='orange', alpha=0.2, label='Validation')
plt.axvspan(val_end, total_size, color='red', alpha=0.2, label='Test')
plt.title("Train, Validation, and Test Splits")
plt.xlabel("Time Steps")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image

=== Step 5: Time Series CV + ARIMA per IMF ===¶

In [167]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import numpy as np
In [176]:
# Define ARIMA hyperparameter ranges
p_values = [0, 1, 2,3,4,5,6]
d_values = [0]
q_values = [0, 1, 2,3,4]

# Now define the parameter grid
param_grid = {
    'p': p_values,
    'd': d_values,
    'q': q_values
}
In [178]:
imf_predictions = []
best_params_summary = []
In [180]:
selected_imfs = imfs[3:6]

Tuning ARIMA for each selected IMF¶

In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=3):
    print(f"🔍 Tuning ARIMA for IMF {imf_index}")
    best_score = float('inf')
    best_params = None
    best_forecast = None

    tscv = TimeSeriesSplit(n_splits=3)

    for params in ParameterGrid(param_grid):
        fold_losses = []

        for train_idx, val_idx in tscv.split(imf):
            train_series = imf[train_idx]
            val_series = imf[val_idx]

            try:
                model = SARIMAX(
                    train_series,
                    order=(params['p'], params['d'], params['q']),
                    enforce_stationarity=False,
                    enforce_invertibility=False
                )
                model_fit = model.fit(disp=False)

                val_forecast = model_fit.forecast(steps=len(val_series))
                score = mean_squared_error(val_series, val_forecast)
                fold_losses.append(score)
            except:
                continue  # skip bad models

        if fold_losses:
            avg_loss = np.mean(fold_losses)

            if avg_loss < best_score:
                best_score = avg_loss
                best_params = params

                try:
                    full_model = SARIMAX(
                        imf,
                        order=(params['p'], params['d'], params['q']),
                        enforce_stationarity=False,
                        enforce_invertibility=False
                    )
                    full_model_fit = full_model.fit(disp=False)
                    forecast = full_model_fit.forecast(steps=test_size)  # test_size must be defined
                    best_forecast = forecast
                except:
                    continue

    print(f"✅ Best ARIMA params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
    best_params_summary.append({
        "IMF": imf_index,
        "p": best_params['p'],
        "d": best_params['d'],
        "q": best_params['q'],
        "Validation MSE": best_score
    })
    imf_predictions.append(best_forecast)
🔍 Tuning ARIMA for IMF 3

Final refitting on train+val and forecasting on test using best ARIMA parameters¶

In [ ]:
full_model_summaries = []

for imf, summary in zip(selected_imfs, best_params_summary):
    imf_index = summary["IMF"]
    params = summary

    # Separate train and test
    train_data = imf[:train_size + val_size]
    test_data = imf[train_size + val_size:]

    model = SARIMAX(
        train_data,
        order=(params['p'], params['d'], params['q']),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    model_fit = model.fit(disp=False)

    forecast = model_fit.forecast(steps=len(test_data))
    full_model_summaries.append({
        "IMF": imf_index,
        "Test MSE": mean_squared_error(test_data, forecast)
    })

Combine forecasts from selected IMFs¶

In [ ]:
# Reconstruct forecast
reconstructed_forecast = np.sum(imf_predictions, axis=0)

=== Step 9: Evaluation ===¶

In [ ]:
actual = df['Modal Price (Rs./kg)'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]
In [ ]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Ensure arrays are numpy-compatible and flattened
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()

# Evaluation Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)

# Directional Accuracy (DA)
if len(actual) > 1 and len(reconstructed_forecast) > 1:
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(reconstructed_forecast))

    if len(actual_diff) == len(forecast_diff):
        final_da = np.mean(actual_diff == forecast_diff) * 100
    else:
        final_da = np.nan
else:
    final_da = np.nan

# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A (insufficient data)")
In [ ]:
 

=== Step 10: Plotting Forecast vs Actual ===¶

In [159]:
# After your existing code up to the final plot, replace with this:

# Get the actual date indices for each segment
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:]

plt.figure(figsize=(14, 6))

# Plot all segments with proper date indexing
plt.plot(train_dates, data[:train_size], label="Train", color='green')
plt.plot(val_dates, data[train_size:train_size+val_size], label="Validation", color='orange')
plt.plot(test_dates, data[train_size+val_size:], label="Test (Actual)", color='red')

# Connect the last validation point to first forecast point
plt.plot([val_dates[-1], test_dates[0]], 
         [data[train_size+val_size-1], reconstructed_forecast[0]], 
         '--', color='blue', alpha=0.3)  # Connection line

# Plot the forecast
plt.plot(test_dates, reconstructed_forecast, label="Forecast (EMD+SARIMA)", linestyle='--', color='blue')

plt.title(f"Cardamom Price Forecast (EMD+SARIMA)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()

# Improve x-axis date formatting
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(10))  # Reduce number of x-ticks
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [160]:
plt.plot(test_dates, data[train_size+val_size:], label="Test (Actual)", color='red')

# Connect the last validation point to first forecast point
plt.plot([val_dates[-1], test_dates[0]], 
         [data[train_size+val_size-1], reconstructed_forecast[0]], 
         '--', color='blue', alpha=0.3)  # Connection line

# Plot the forecast
plt.plot(test_dates, reconstructed_forecast, label="Forecast (EMD+SARIMA)", linestyle='--', color='blue')
Out[160]:
[<matplotlib.lines.Line2D at 0x1faae9278f0>]
No description has been provided for this image
In [3]:
import warnings
import numpy as np
import pandas as pd

import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Import matplotlib.pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
warnings.filterwarnings("ignore")
# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

# EMD Decomposition
emd = EMD()
imfs = emd.emd(data)

# Plot IMFs
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1}")
    plt.grid()

plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]

# Hyperparameter tuning for ARIMA
param_grid = {
    'p': range(0, 7),
    'd': range(0,1),
    'q': range(0,7)
}

best_arima_params = {}
best_imf_models = {}

# Train ARIMA on each IMF
for i, imf in enumerate(imfs):
    print(f"\nTraining ARIMA for IMF {i+1}")
    
    best_score = float('inf')
    best_params = None
    best_model = None
    
    for params in ParameterGrid(param_grid):
        try:
            model = ARIMA(imf[:train_size], order=(params['p'], params['d'], params['q']))
            model_fit = model.fit()
            val_pred = model_fit.forecast(steps=len(val_data))
            score = mean_squared_error(imf[train_size:train_size+val_size], val_pred)
            
            if score < best_score:
                best_score = score
                best_params = params
                best_model = model_fit
                
        except:
            continue
    
    best_arima_params[f'IMF_{i+1}'] = best_params
    best_imf_models[f'IMF_{i+1}'] = best_model
    print(f"Best params for IMF {i+1}: {best_params} with MSE: {best_score:.4f}")

# Reconstruct and evaluate on test set
test_predictions = np.zeros(len(test_data))

for i, (imf_name, model) in enumerate(best_imf_models.items()):
    imf_pred = model.forecast(steps=len(test_data))
    test_predictions += imf_pred

# Evaluation metrics
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(test_data, test_predictions)
mape = mean_absolute_percentage_error(test_data, test_predictions)
r2 = r2_score(test_data, test_predictions)

print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}")
print(f"R²: {r2:.4f}")

# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD-ARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Model summary for each IMF
for imf_name, model in best_imf_models.items():
    print(f"\n{imf_name} ARIMA Model Summary:")
    print(model.summary())
    
    # Plot diagnostics
    model.plot_diagnostics(figsize=(12, 8))
    plt.suptitle(f"{imf_name} ARIMA Diagnostics", y=1.02)
    plt.tight_layout()
    plt.tight_layout()
    plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
    plt.savefig("C:/Users/marti/Desktop/png/eAR_result3.png", dpi=300, bbox_inches='tight')
    plt.show()
No description has been provided for this image
Training ARIMA for IMF 1
Best params for IMF 1: {'d': 0, 'p': 1, 'q': 5} with MSE: 1750.0254

Training ARIMA for IMF 2
Best params for IMF 2: {'d': 0, 'p': 3, 'q': 0} with MSE: 3165.6733

Training ARIMA for IMF 3
Best params for IMF 3: {'d': 0, 'p': 2, 'q': 3} with MSE: 6727.5757

Training ARIMA for IMF 4
Best params for IMF 4: {'d': 0, 'p': 6, 'q': 2} with MSE: 10721.9262

Training ARIMA for IMF 5
Best params for IMF 5: {'d': 0, 'p': 4, 'q': 6} with MSE: 168133.2143

Training ARIMA for IMF 6
Best params for IMF 6: {'d': 0, 'p': 6, 'q': 5} with MSE: 62.9625

Final Evaluation Metrics:
MSE: 209150.1327
RMSE: 457.3293
MAE: 394.7422
MAPE: 0.2738
R²: -0.0522
No description has been provided for this image
IMF_1 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(1, 0, 5)   Log Likelihood               -3093.977
Date:                Thu, 06 Nov 2025   AIC                           6203.954
Time:                        22:21:58   BIC                           6237.750
Sample:                             0   HQIC                          6217.210
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.8959     15.708      0.057      0.955     -29.891      31.683
ar.L1          0.5761      0.086      6.703      0.000       0.408       0.745
ma.L1         -0.4222      0.088     -4.821      0.000      -0.594      -0.251
ma.L2          0.1689      0.023      7.403      0.000       0.124       0.214
ma.L3          0.0302      0.032      0.957      0.339      -0.032       0.092
ma.L4          0.1166      0.039      2.957      0.003       0.039       0.194
ma.L5          0.1655      0.054      3.072      0.002       0.060       0.271
sigma2      1.226e+04    344.827     35.559      0.000    1.16e+04    1.29e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.13   Jarque-Bera (JB):              7125.23
Prob(Q):                              0.72   Prob(JB):                         0.00
Heteroskedasticity (H):              20.79   Skew:                            -1.21
Prob(H) (two-sided):                  0.00   Kurtosis:                        21.24
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
IMF_2 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(3, 0, 0)   Log Likelihood               -2733.353
Date:                Thu, 06 Nov 2025   AIC                           5476.706
Time:                        22:22:02   BIC                           5497.829
Sample:                             0   HQIC                          5484.991
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         11.7618    100.250      0.117      0.907    -184.725     208.248
ar.L1          2.0483      0.012    164.464      0.000       2.024       2.073
ar.L2         -1.7123      0.021    -81.475      0.000      -1.753      -1.671
ar.L3          0.6377      0.013     49.607      0.000       0.613       0.663
sigma2      2912.5351     66.936     43.512      0.000    2781.343    3043.728
===================================================================================
Ljung-Box (L1) (Q):                  91.23   Jarque-Bera (JB):             29580.25
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):              50.33   Skew:                             1.43
Prob(H) (two-sided):                  0.00   Kurtosis:                        40.38
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
IMF_3 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(2, 0, 3)   Log Likelihood               -1638.131
Date:                Thu, 06 Nov 2025   AIC                           3290.262
Time:                        22:22:07   BIC                           3319.834
Sample:                             0   HQIC                          3301.862
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.0279     14.706      0.342      0.732     -23.795      33.851
ar.L1          1.7983      0.006    310.468      0.000       1.787       1.810
ar.L2         -0.8979      0.006   -149.878      0.000      -0.910      -0.886
ma.L1          1.8303      0.016    113.065      0.000       1.799       1.862
ma.L2          1.4365      0.025     56.927      0.000       1.387       1.486
ma.L3          0.4791      0.015     32.146      0.000       0.450       0.508
sigma2        37.3411      0.893     41.817      0.000      35.591      39.091
===================================================================================
Ljung-Box (L1) (Q):                  22.13   Jarque-Bera (JB):             27653.31
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):             204.27   Skew:                            -0.46
Prob(H) (two-sided):                  0.00   Kurtosis:                        39.24
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
IMF_4 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(6, 0, 2)   Log Likelihood                 429.574
Date:                Thu, 06 Nov 2025   AIC                           -839.149
Time:                        22:22:10   BIC                           -796.903
Sample:                             0   HQIC                          -822.578
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         16.3807     43.958      0.373      0.709     -69.776     102.537
ar.L1          3.3543      0.490      6.842      0.000       2.393       4.315
ar.L2         -3.5991      1.770     -2.033      0.042      -7.068      -0.130
ar.L3          0.4667      2.311      0.202      0.840      -4.063       4.996
ar.L4          1.5751      1.227      1.284      0.199      -0.829       3.979
ar.L5         -0.9290      0.324     -2.870      0.004      -1.563      -0.295
ar.L6          0.1318      0.133      0.990      0.322      -0.129       0.393
ma.L1          1.0005      0.503      1.989      0.047       0.015       1.986
ma.L2          0.1576      0.342      0.460      0.645      -0.513       0.828
sigma2         0.0099      0.000     31.044      0.000       0.009       0.011
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):             46089.69
Prob(Q):                              0.81   Prob(JB):                         0.00
Heteroskedasticity (H):               5.69   Skew:                            -1.09
Prob(H) (two-sided):                  0.00   Kurtosis:                        49.75
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
IMF_5 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(4, 0, 6)   Log Likelihood                2215.512
Date:                Thu, 06 Nov 2025   AIC                          -4407.024
Time:                        22:22:15   BIC                          -4356.329
Sample:                             0   HQIC                         -4387.140
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         33.1840      0.000   1.08e+05      0.000      33.183      33.185
ar.L1          3.9437      0.000   1.06e+04      0.000       3.943       3.944
ar.L2         -5.8380      0.002  -2854.519      0.000      -5.842      -5.834
ar.L3          3.8449      0.003   1294.961      0.000       3.839       3.851
ar.L4         -0.9506      0.001   -732.655      0.000      -0.953      -0.948
ma.L1          0.5451      0.021     25.382      0.000       0.503       0.587
ma.L2          0.0285      0.013      2.196      0.028       0.003       0.054
ma.L3         -0.0949      0.020     -4.721      0.000      -0.134      -0.055
ma.L4          0.1659      0.027      6.226      0.000       0.114       0.218
ma.L5         -0.0416      0.012     -3.497      0.000      -0.065      -0.018
ma.L6         -0.1280      0.028     -4.561      0.000      -0.183      -0.073
sigma2      8.068e-06   2.28e-07     35.362      0.000    7.62e-06    8.51e-06
===================================================================================
Ljung-Box (L1) (Q):                   0.09   Jarque-Bera (JB):             10712.40
Prob(Q):                              0.76   Prob(JB):                         0.00
Heteroskedasticity (H):               0.36   Skew:                            -0.71
Prob(H) (two-sided):                  0.00   Kurtosis:                        25.52
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.42e+17. Standard errors may be unstable.
No description has been provided for this image
IMF_6 ARIMA Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(6, 0, 5)   Log Likelihood                1410.152
Date:                Thu, 06 Nov 2025   AIC                          -2794.305
Time:                        22:22:18   BIC                          -2739.386
Sample:                             0   HQIC                         -2772.764
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        874.8754      8.475    103.225      0.000     858.264     891.487
ar.L1         -1.5260      0.003   -487.551      0.000      -1.532      -1.520
ar.L2          1.0409      0.008    136.127      0.000       1.026       1.056
ar.L3          3.0890      0.006    490.915      0.000       3.077       3.101
ar.L4          0.8510      0.006    150.759      0.000       0.840       0.862
ar.L5         -1.5335      0.004   -365.778      0.000      -1.542      -1.525
ar.L6         -0.9219      0.003   -305.895      0.000      -0.928      -0.916
ma.L1          4.5909      0.027    170.531      0.000       4.538       4.644
ma.L2          8.7593      0.054    163.319      0.000       8.654       8.864
ma.L3          8.6709      0.045    192.230      0.000       8.583       8.759
ma.L4          4.4523      0.019    236.755      0.000       4.415       4.489
ma.L5          0.9493      0.006    151.144      0.000       0.937       0.962
sigma2         0.0002    1.9e-05     10.710      0.000       0.000       0.000
===================================================================================
Ljung-Box (L1) (Q):                 476.13   Jarque-Bera (JB):               126.77
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.45   Skew:                            -0.76
Prob(H) (two-sided):                  0.00   Kurtosis:                         4.93
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
In [ ]:
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD-ARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result11.png", dpi=300, bbox_inches='tight')
plt.show()
In [5]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from itertools import product
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings("ignore")

# Load data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
data = df['Modal Price (Rs./kg)'].values

# EMD decomposition
emd = EMD()
emd.extrema_detection = "parabol"
imfs = emd.emd(data, max_imf=5)

# Filter meaningful IMFs
imfs = [imf for imf in imfs if np.var(imf) > 0.05 * np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")

# Combine IMFs into one reconstructed signal
reconstructed_signal = np.sum(imfs, axis=0)

# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

train_data = reconstructed_signal[:train_size]
val_data = reconstructed_signal[train_size:train_size+val_size]
test_data = reconstructed_signal[train_size+val_size:]

# Parameter grid for ARIMA (p, d, q)
param_grid = {
    'order': [(1,0,0), (1,1,0), (2,1,0), (2,1,2), (3,0,2), (5,1,0)]
}

best_score = np.inf
best_model = None
best_params = None

# Train ARIMA on combined signal
for order in tqdm(param_grid['order']):
    try:
        model = ARIMA(train_data, order=order)
        model_fit = model.fit()
        val_pred = model_fit.forecast(steps=val_size)
        mse = mean_squared_error(val_data, val_pred)
        if mse < best_score:
            best_score = mse
            best_model = model_fit
            best_params = order
    except:
        continue

print(f"\nBest ARIMA Params: {best_params} | Validation MSE: {best_score:.4f}")

# Forecast
test_predictions = best_model.forecast(steps=test_size)

# Evaluation
metrics = {
    'MSE': mean_squared_error(test_data, test_predictions),
    'RMSE': np.sqrt(mean_squared_error(test_data, test_predictions)),
    'MAE': mean_absolute_error(test_data, test_predictions),
    'MAPE': mean_absolute_percentage_error(test_data, test_predictions),
    'R²': r2_score(test_data, test_predictions)
}
print("\nFinal Evaluation Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD–ARIMA (Combined IMFs): Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()

# Model summary
print(best_model.summary())
Selected 6 meaningful IMFs
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.32it/s]
Best ARIMA Params: (1, 0, 0) | Validation MSE: 23103.7183

Final Evaluation Metrics:
MSE: 592684.4402
RMSE: 769.8600
MAE: 684.7383
MAPE: 0.4179
R²: -1.9817
No description has been provided for this image
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  505
Model:                 ARIMA(1, 0, 0)   Log Likelihood               -3322.188
Date:                Thu, 06 Nov 2025   AIC                           6650.375
Time:                        11:23:46   BIC                           6663.049
Sample:                             0   HQIC                          6655.346
                                - 505                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        942.1786    236.343      3.986      0.000     478.955    1405.402
ar.L1          0.9550      0.014     66.751      0.000       0.927       0.983
sigma2       3.02e+04    532.743     56.682      0.000    2.92e+04    3.12e+04
===================================================================================
Ljung-Box (L1) (Q):                  11.50   Jarque-Bera (JB):             66137.94
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):              20.92   Skew:                             2.40
Prob(H) (two-sided):                  0.00   Kurtosis:                        58.86
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [3]:
# Model summary for each IMF
for imf_name, model in best_imf_models.items():
    print(f"\n{imf_name} ARIMA Model Summary:")
    print(model.summary())
    
    # Plot diagnostics
    model.plot_diagnostics(figsize=(12, 8))
    plt.suptitle(f"{imf_name} ARIMA Diagnostics", y=1.02)
    plt.tight_layout()
    plt.tight_layout()
    plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
    plt.savefig("C:/Users/marti/Desktop/png/eAR_result3.png", dpi=300, bbox_inches='tight')
    plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 2
      1 # Model summary for each IMF
----> 2 for imf_name, model in best_imf_models.items():
      3     print(f"\n{imf_name} ARIMA Model Summary:")
      4     print(model.summary())

NameError: name 'best_imf_models' is not defined
In [36]:
# --- Make sure arrays are flattened ---
test_data = np.asarray(test_data).flatten()
test_predictions = np.asarray(test_predictions).flatten()

# --- Generate DataFrame ---
results_df = pd.DataFrame({
    'Date': df.index[train_size + val_size : train_size + val_size + len(test_data)],
    'Actual': test_data,
    'Predicted': test_predictions
})

# --- Save as TSV ---
results_df.to_csv('emd_sarima_test_vs_predicted.tsv', 
                  sep='\t', 
                  index=False,
                  float_format='%.4f')

print("\n✅ File saved: 'emd_sarima_test_vs_predicted.tsv'")
✅ File saved: 'emd_sarima_test_vs_predicted.tsv'
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

=== EMD + SARIMA ===¶

=== Import Libraries ===¶

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings("ignore")

=== Step 1: Load and preprocess data ===¶

In [16]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [17]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [20]:
df["Modal Price (Rs./Kg)"]= df["Modal Price (Rs./Quintal)"]/100
In [22]:
df = df[["Modal Price (Rs./Kg)"]].dropna()
df.rename(columns={"Modal Price (Rs./Kg)": "Price"}, inplace=True)
price_values = df['Price'].values
In [24]:
Date=df.index

=== Plot Original Series ===¶

In [23]:
plt.figure(figsize=(12, 6))
plt.plot(df.index, price_values)
plt.title('Original Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

=== Apply EMD ===¶

In [26]:
emd = EMD()
imfs = emd(price_values)
selected_imf_indices = list(range(3,6 ))
selected_imfs = imfs[selected_imf_indices]

=== Visualize IMFs ===¶

In [29]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, price_values, 'r')
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1}")
    plt.grid()
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image

=== Split Data ===¶

In [18]:
total_size = len(df)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_val_idx = slice(0, train_size + val_size)
test_idx = slice(train_size + val_size, total_size)

=== Step 10: Plot Full Data Split ===¶

=== SARIMA Hyperparameter Tuning ===¶

In [35]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import numpy as np

Define SARIMA Hyperparameter Options¶

In [38]:
p_values = [0, 1, 2,3,4,5,6]
d_values = [0, 1]
q_values = [0, 1, 2,3,4,5]
P_values = [0, 1,2,3,4,5]
D_values = [0, 1]
Q_values = [0, 1,2,3,4,5]
m = 26  # seasonal period
param_grid = {
    'p': p_values,
    'd': d_values,
    'q': q_values,
    'P': P_values,
    'D': D_values,
    'Q': Q_values
}

Initialize empty lists¶

In [41]:
imf_predictions = []
best_params_summary = []

Select IMFs (Example: IMF 3 to IMF 6)¶

In [44]:
selected_imfs = imfs
from tqdm import tqdm  # Add this import at the top with other imports
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit

Start tuning¶

In [47]:
# Reconstruction and evaluation
reconstructed_forecast = np.sum(imf_predictions, axis=0)
actual = df['Price'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]

# Ensure arrays are NumPy and 1D
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()

# Core Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)

# Directional Accuracy
if len(actual) > 1 and len(reconstructed_forecast) > 1:
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(reconstructed_forecast))
    final_da = np.mean(actual_diff == forecast_diff) * 100 if len(actual_diff) == len(forecast_diff) else np.nan
else:
    final_da = np.nan

# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A")

# Plot results
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (EMD-SARIMA)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (EMD-SARIMA)\nSelected IMFs: 2–6 | RMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[47], line 11
      8 reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()
     10 # Core Metrics
---> 11 final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
     12 final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
     13 final_mae = mean_absolute_error(actual, reconstructed_forecast)

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_regression.py:497, in mean_squared_error(y_true, y_pred, sample_weight, multioutput, squared)
    492     if not squared:
    493         return root_mean_squared_error(
    494             y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput
    495         )
--> 497 y_type, y_true, y_pred, multioutput = _check_reg_targets(
    498     y_true, y_pred, multioutput
    499 )
    500 check_consistent_length(y_true, y_pred, sample_weight)
    501 output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_regression.py:102, in _check_reg_targets(y_true, y_pred, multioutput, dtype)
     68 def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
     69     """Check that y_true and y_pred belong to the same regression task.
     70 
     71     Parameters
   (...)
    100         correct keyword.
    101     """
--> 102     check_consistent_length(y_true, y_pred)
    103     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
    104     y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)

File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:457, in check_consistent_length(*arrays)
    455 uniques = np.unique(lengths)
    456 if len(uniques) > 1:
--> 457     raise ValueError(
    458         "Found input variables with inconsistent numbers of samples: %r"
    459         % [int(l) for l in lengths]
    460     )

ValueError: Found input variables with inconsistent numbers of samples: [0, 1]
In [ ]:
# Plot results
plt.figure(figsize=(14, 6))


plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (EMD-SARIMA)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (EMD-SARIMA)\nSelected IMFs: 2–6 | RMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=2):
    print(f"🔍 Tuning SARIMA for IMF {imf_index}")
    best_score = float('inf')
    best_params = None
    best_forecast = None

    tscv = TimeSeriesSplit(n_splits=3)

    for params in ParameterGrid(param_grid):
        fold_losses = []

        for train_idx, val_idx in tscv.split(imf):
            train_series = imf[train_idx]
            val_series = imf[val_idx]

            try:
                model = SARIMAX(
                    train_series,
                    order=(params['p'], params['d'], params['q']),
                    seasonal_order=(params['P'], params['D'], params['Q'], m),
                    enforce_stationarity=False,
                    enforce_invertibility=False
                )
                model_fit = model.fit(disp=False)

                val_forecast = model_fit.forecast(steps=len(val_series))
                score = mean_squared_error(val_series, val_forecast)
                fold_losses.append(score)
            except:
                continue  # skip bad models

        if fold_losses:
            avg_loss = np.mean(fold_losses)

            if avg_loss < best_score:
                best_score = avg_loss
                best_params = params

                try:
                    full_model = SARIMAX(
                        imf,
                        order=(params['p'], params['d'], params['q']),
                        seasonal_order=(params['P'], params['D'], params['Q'], m),
                        enforce_stationarity=False,
                        enforce_invertibility=False
                    )
                    full_model_fit = full_model.fit(disp=False)
                    forecast = full_model_fit.forecast(steps=test_size)  # Define test_size before
                    best_forecast = forecast
                except:
                    continue

    print(f"✅ Best params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
    best_params_summary.append({
        "IMF": imf_index,
        "p": best_params['p'],
        "d": best_params['d'],
        "q": best_params['q'],
        "P": best_params['P'],
        "D": best_params['D'],
        "Q": best_params['Q'],
        "m": m,
        "Validation MSE": best_score
    })
    imf_predictions.append(best_forecast)
🔍 Tuning SARIMA for IMF 2

=== Fit SARIMA on Selected IMFs ===¶

In [ ]:
# Initialize summary list
full_model_summaries = []

# Loop over selected IMFs and corresponding best parameters
for i, (imf, summary) in enumerate(zip(selected_imfs, best_params_summary)):
    imf_index = summary["IMF"]
    params = summary

    # Separate train and test
    train_data = imf[:train_size + val_size]
    test_data = imf[train_size + val_size:]

    # Refit best SARIMA
    model = SARIMAX(
        train_data,
        order=(params['p'], params['d'], params['q']),
        seasonal_order=(params['P'], params['D'], params['Q'], m),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    model_fit = model.fit(disp=False)

=== Reconstruct Final Forecast ===¶

In [ ]:
reconstructed_forecast = np.sum(imf_predictions, axis=0)

=== Step 4: Get actual test values ===¶

In [ ]:
actual = df['Price'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]

=== Evaluation ===¶

In [ ]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Ensure arrays are NumPy and 1D
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()

# Core Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)

# Directional Accuracy
if len(actual) > 1 and len(reconstructed_forecast) > 1:
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(reconstructed_forecast))

    if len(actual_diff) == len(forecast_diff):
        final_da = np.mean(actual_diff == forecast_diff) * 100
    else:
        final_da = np.nan
else:
    final_da = np.nan

# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A (insufficient data)")

=== Plot Forecast vs Actual ===¶

In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (SARIMA + EMD)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (SARIMA + EMD)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result5.png", dpi=300, bbox_inches='tight')
plt.show()
In [ ]:
plt.figure(figsize=(12, 6))
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (SARIMA + EMD)", linestyle='--', color='blue')
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result6.png", dpi=300, bbox_inches='tight')
plt.show()
In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from itertools import product
from joblib import Parallel, delayed
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

# EMD Decomposition with reduced components
print("Performing EMD decomposition...")
emd = EMD()
emd.extrema_detection = "parabol"
imfs = emd.emd(data, max_imf=5)  # Limit to 5 IMFs

# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")

# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
    plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# Data splitting with proper variable definitions
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]  # This was missing in original code

# Focused SARIMA parameter grid
param_grid = {
    'order': [(2,1,0),(5,1,0),(3,0,2),(2,0,3)],
    'seasonal_order': [(0,1,1,26), (1,1,1,26), (0,1,0,26),(2,0,1,26),(2,0,2,26),(2,0,0,26)]
}

best_imf_models = {}

def train_sarima(imf, order, seasonal_order, train_size, val_size):
    try:
        model = SARIMAX(imf[:train_size],
                       order=order,
                       seasonal_order=seasonal_order,
                       enforce_stationarity=False,
                       enforce_invertibility=False)
        model_fit = model.fit(disp=False)
        val_pred = model_fit.forecast(steps=val_size)
        return mean_squared_error(imf[train_size:train_size+val_size], val_pred), model_fit
    except:
        return np.inf, None

# Parallel training for each IMF
for i, imf in enumerate(imfs, start=1):
    print(f"\nTraining SARIMA for IMF {i} (Variance: {np.var(imf):.2f})")
    
    results = Parallel(n_jobs=-1)(
        delayed(train_sarima)(imf, order, seasonal_order, train_size, val_size)
        for order, seasonal_order in tqdm(product(param_grid['order'], param_grid['seasonal_order']),
                                        total=len(param_grid['order'])*len(param_grid['seasonal_order']))
    )
    
    scores, models = zip(*results)
    best_idx = np.argmin(scores)
    best_imf_models[f'IMF_{i}'] = models[best_idx]
    
    if models[best_idx]:
        print(f"Best params: {models[best_idx].model.order}x{models[best_idx].model.seasonal_order} | MSE: {scores[best_idx]:.4f}")

# Reconstruct and evaluate
test_predictions = np.zeros(test_size)
for imf_name, model in best_imf_models.items():
    if model:  # Only use valid models
        test_predictions += model.forecast(steps=test_size)

# Evaluation metrics
metrics = {
    'MSE': mean_squared_error(test_data, test_predictions),
    'RMSE': np.sqrt(mean_squared_error(test_data, test_predictions)),
    'MAE': mean_absolute_error(test_data, test_predictions),
    'MAPE': mean_absolute_percentage_error(test_data, test_predictions),
    'R²': r2_score(test_data, test_predictions)
}

print("\nFinal Evaluation Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
# Extract the actual test values and the predictions
y_actual = test_data
y_pred = test_predictions

# Ensure they are the same length (a good practice)
min_length = min(len(y_actual), len(y_pred))
y_actual = y_actual[:min_length]
y_pred = y_pred[:min_length]

# Standard Regression Metrics
mse = mean_squared_error(y_actual, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_actual, y_pred)
mape = mean_absolute_percentage_error(y_actual, y_pred)
r2 = r2_score(y_actual, y_pred)

# --- Calculate Directional Accuracy (DA) ---
# Calculate the actual change and the predicted change between consecutive points
actual_changes = np.diff(y_actual)  # e.g., [y1-y0, y2-y1, ...]
predicted_changes = np.diff(y_pred) # e.g., [y_hat1-y_hat0, y_hat2-y_hat1, ...]

# Check if the direction (sign) of the change is the same
correct_direction = np.sign(actual_changes) == np.sign(predicted_changes)

# Calculate the percentage of correct directions
da = (np.sum(correct_direction) / len(actual_changes)) * 100
# Plot results
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("Optimized EMD-SARIMA: Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Print all metrics, including DA
print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")
# Plot results
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("Optimized EMD-SARIMA: Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()

# Model diagnostics
for imf_name, model in best_imf_models.items():
    if model:
        print(f"\n{imf_name} Model Summary:")
        print(model.summary())
        model.plot_diagnostics(figsize=(12, 8))
        plt.suptitle(f"{imf_name} Diagnostics", y=1.02)
        plt.tight_layout()
        plt.savefig("C:/Users/marti/Desktop/png/esAR_result4.png", dpi=300, bbox_inches='tight')
        plt.show()
Performing EMD decomposition...
Selected 6 meaningful IMFs
No description has been provided for this image
Training SARIMA for IMF 1 (Variance: 55233660341437862295686600458240.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:09<00:00,  2.42it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 363553755381698150724496523264.0000

Training SARIMA for IMF 2 (Variance: 49912052842838056400575845629952.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 14.21it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 59775053773018410379879907328.0000

Training SARIMA for IMF 3 (Variance: 449418934480625974407117930496.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 12.01it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 69933874802371244797865230336.0000

Training SARIMA for IMF 4 (Variance: 434742580397082004974810955776.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00,  8.81it/s]
Best params: (5, 1, 0)x(2, 0, 0, 26) | MSE: 14234279385445683335121600512.0000

Training SARIMA for IMF 5 (Variance: 74837305360678816574282399744.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 10.68it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 39097127682665127677526016.0000

Training SARIMA for IMF 6 (Variance: 1443827991893432784687988736.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:04<00:00,  5.30it/s]
Best params: (2, 1, 0)x(0, 1, 0, 26) | MSE: 168209.9778

Final Evaluation Metrics:
MSE: 110808286918274838905162301440.0000
RMSE: 332878787125696.6250
MAE: 283766135451053.4375
MAPE: 191852526819.0257
R²: -557451738620487155056640.0000
No description has been provided for this image
Final Evaluation Metrics:
MSE: 110808286918274838905162301440.0000
RMSE: 332878787125696.6250
MAE: 283766135451053.4375
MAPE: 191852526819.0257%
R²: -557451738620487155056640.0000
Directional Accuracy (DA): 21.30%
No description has been provided for this image
IMF_1 Model Summary:
                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  505
Model:             SARIMAX(3, 0, 2)x(2, 0, [], 26)   Log Likelihood              -15480.155
Date:                             Thu, 06 Nov 2025   AIC                          30976.310
Time:                                     11:47:57   BIC                          31009.184
Sample:                                          0   HQIC                         30989.267
                                             - 505                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          2.6141      0.036     73.199      0.000       2.544       2.684
ar.L2         -2.3666      0.070    -33.895      0.000      -2.503      -2.230
ar.L3          0.7460      0.036     20.552      0.000       0.675       0.817
ma.L1          0.8043      0.048     16.922      0.000       0.711       0.897
ma.L2         -0.0875      0.051     -1.717      0.086      -0.187       0.012
ar.S.L26      -0.0085      0.081     -0.104      0.917      -0.168       0.151
ar.S.L52      -0.0035      0.191     -0.018      0.985      -0.377       0.370
sigma2      4.565e+28        nan        nan        nan         nan         nan
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):            401456.37
Prob(Q):                              1.00   Prob(JB):                         0.00
Heteroskedasticity (H):             586.01   Skew:                            -7.62
Prob(H) (two-sided):                  0.00   Kurtosis:                       148.53
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.31e+71. Standard errors may be unstable.
No description has been provided for this image
IMF_2 Model Summary:
                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  505
Model:             SARIMAX(3, 0, 2)x(2, 0, [], 26)   Log Likelihood              -15478.854
Date:                             Thu, 06 Nov 2025   AIC                          30973.708
Time:                                     11:48:00   BIC                          31006.582
Sample:                                          0   HQIC                         30986.665
                                             - 505                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          2.6083      0.036     72.740      0.000       2.538       2.679
ar.L2         -2.3574      0.070    -33.581      0.000      -2.495      -2.220
ar.L3          0.7418      0.037     20.260      0.000       0.670       0.814
ma.L1          0.8041      0.047     17.091      0.000       0.712       0.896
ma.L2         -0.0877      0.050     -1.738      0.082      -0.187       0.011
ar.S.L26      -0.0084      0.083     -0.101      0.919      -0.171       0.154
ar.S.L52      -0.0042      0.180     -0.023      0.982      -0.357       0.349
sigma2      4.537e+28        nan        nan        nan         nan         nan
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):            397867.33
Prob(Q):                              0.99   Prob(JB):                         0.00
Heteroskedasticity (H):             584.81   Skew:                             7.57
Prob(H) (two-sided):                  0.00   Kurtosis:                       147.88
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.36e+72. Standard errors may be unstable.
No description has been provided for this image
IMF_3 Model Summary:
                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  505
Model:             SARIMAX(3, 0, 2)x(2, 0, [], 26)   Log Likelihood              -12658.025
Date:                             Thu, 06 Nov 2025   AIC                          25332.050
Time:                                     11:48:03   BIC                          25364.924
Sample:                                          0   HQIC                         25345.007
                                             - 505                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          2.9649      0.025    116.462      0.000       2.915       3.015
ar.L2         -2.9416      0.050    -58.336      0.000      -3.040      -2.843
ar.L3          0.9766      0.025     38.567      0.000       0.927       1.026
ma.L1          1.2987      0.056     23.333      0.000       1.190       1.408
ma.L2          0.7186      0.059     12.282      0.000       0.604       0.833
ar.S.L26       0.0307      0.116      0.264      0.792      -0.197       0.258
ar.S.L52      -0.0004      0.007     -0.053      0.958      -0.014       0.013
sigma2      3.184e+23   2.13e-25    1.5e+48      0.000    3.18e+23    3.18e+23
===================================================================================
Ljung-Box (L1) (Q):                  51.02   Jarque-Bera (JB):              1287.00
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):              11.25   Skew:                            -0.25
Prob(H) (two-sided):                  0.00   Kurtosis:                        11.27
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.13e+63. Standard errors may be unstable.
No description has been provided for this image
IMF_4 Model Summary:
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                  505
Model:             SARIMAX(5, 1, 0)x(2, 0, 0, 26)   Log Likelihood              -10564.602
Date:                            Thu, 06 Nov 2025   AIC                          21145.204
Time:                                    11:48:06   BIC                          21178.024
Sample:                                         0   HQIC                         21158.143
                                            - 505                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          3.5023      0.019    180.791      0.000       3.464       3.540
ar.L2         -4.8957      0.066    -73.718      0.000      -5.026      -4.766
ar.L3          3.6338      0.096     38.037      0.000       3.447       3.821
ar.L4         -1.5914      0.071    -22.401      0.000      -1.731      -1.452
ar.L5          0.3509      0.022     16.053      0.000       0.308       0.394
ar.S.L26       0.0490      0.067      0.730      0.465      -0.083       0.181
ar.S.L52       0.0300      0.049      0.610      0.542      -0.066       0.126
sigma2      1.643e+19   3.19e-21   5.16e+39      0.000    1.64e+19    1.64e+19
===================================================================================
Ljung-Box (L1) (Q):                  24.06   Jarque-Bera (JB):              7184.28
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.50   Skew:                            -0.43
Prob(H) (two-sided):                  0.00   Kurtosis:                        22.62
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 9.57e+54. Standard errors may be unstable.
No description has been provided for this image
IMF_5 Model Summary:
                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  505
Model:             SARIMAX(3, 0, 2)x(2, 0, [], 26)   Log Likelihood              -10887.006
Date:                             Thu, 06 Nov 2025   AIC                          21790.011
Time:                                     11:48:09   BIC                          21822.885
Sample:                                          0   HQIC                         21802.968
                                             - 505                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          2.9424      0.001   3126.970      0.000       2.941       2.944
ar.L2         -2.8849      0.002  -1535.284      0.000      -2.889      -2.881
ar.L3          0.9425      0.001   1004.395      0.000       0.941       0.944
ma.L1      -7.204e-05    1.2e-06    -60.172      0.000   -7.44e-05   -6.97e-05
ma.L2      -2.191e-05   3.84e-07    -57.043      0.000   -2.27e-05   -2.12e-05
ar.S.L26       1.7915      0.010    175.240      0.000       1.771       1.812
ar.S.L52      -0.6243      0.010    -61.792      0.000      -0.644      -0.605
sigma2      1.056e+17    2.4e-19    4.4e+35      0.000    1.06e+17    1.06e+17
===================================================================================
Ljung-Box (L1) (Q):                 391.05   Jarque-Bera (JB):                52.78
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.75   Skew:                            -0.05
Prob(H) (two-sided):                  0.09   Kurtosis:                         4.68
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.64e+49. Standard errors may be unstable.
No description has been provided for this image
IMF_6 Model Summary:
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                  505
Model:             SARIMAX(2, 1, 0)x(0, 1, 0, 26)   Log Likelihood                 166.098
Date:                            Thu, 06 Nov 2025   AIC                           -326.197
Time:                                    11:48:12   BIC                           -313.701
Sample:                                         0   HQIC                          -321.283
                                            - 505                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          2.0000   8.25e-13   2.42e+12      0.000       2.000       2.000
ar.L2         -1.0000   9.72e-13  -1.03e+12      0.000      -1.000      -1.000
sigma2         0.0288   3.53e-23   8.17e+20      0.000       0.029       0.029
===================================================================================
Ljung-Box (L1) (Q):                 230.74   Jarque-Bera (JB):                 0.35
Prob(Q):                              0.00   Prob(JB):                         0.84
Heteroskedasticity (H):               0.52   Skew:                            -0.03
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.12
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.02e+36. Standard errors may be unstable.
No description has been provided for this image

=== EMD + LSTM===¶

In [98]:
pip install EMD-signal
Requirement already satisfied: EMD-signal in c:\users\marti\anaconda3\lib\site-packages (1.6.4)
Requirement already satisfied: numpy>=1.12 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (1.26.4)
Requirement already satisfied: scipy>=0.19 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (1.13.1)
Requirement already satisfied: pathos>=0.2.1 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (0.3.3)
Requirement already satisfied: tqdm<5.0,>=4.64.0 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (4.66.4)
Requirement already satisfied: ppft>=1.7.6.9 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (1.7.6.9)
Requirement already satisfied: dill>=0.3.9 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.3.9)
Requirement already satisfied: pox>=0.3.5 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.3.5)
Requirement already satisfied: multiprocess>=0.70.17 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.70.17)
Requirement already satisfied: colorama in c:\users\marti\appdata\roaming\python\python312\site-packages (from tqdm<5.0,>=4.64.0->EMD-signal) (0.4.6)
Note: you may need to restart the kernel to use updated packages.

=== Loading Libraries ===¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
In [2]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [3]:
df.head()
Out[3]:
State Name District Name Market Name Variety Group Arrivals (Tonnes) Min Price (Rs./Quintal) Max Price (Rs./Quintal) Modal Price (Rs./Quintal) Date
0 Kerala Idukki Nedumkandam Other Spices 14.0 120000 150000 130000 2011-01-16
1 Kerala Idukki Nedumkandam Other Spices 17.0 120000 150000 140000 2011-01-23
2 Kerala Idukki Nedumkandam Other Spices 12.0 120000 150000 130000 2011-01-30
3 Kerala Idukki Nedumkandam Other Spices 8.5 120000 150000 125000 2011-02-06
4 Kerala Idukki Nedumkandam Other Spices 9.2 100000 115000 107500 2011-02-13
In [4]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [5]:
df["Modal Price (Rs./kg)"]=df["Modal Price (Rs./Quintal)"]/100
In [6]:
price_values = df["Modal Price (Rs./kg)"].values

=== Step 1.5: Normalize Original Price Series ===¶

In [8]:
# Extract price values and normalize them
price_values = df["Modal Price (Rs./kg)"].values
scaler_total = MinMaxScaler()
scaled_data = scaler_total.fit_transform(price_values.reshape(-1, 1)).flatten()

=== Step 2: Apply EMD on Normalized Data ===¶

In [10]:
emd = EMD()
imfs = emd(scaled_data)

=== Step 3: Visualize IMFs ===¶

In [12]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs) + 1, 1, 1)
plt.plot(df.index, scaled_data, 'r')
plt.title("Normalized Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs) + 1, 1, i + 2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i + 1}")
    plt.grid()

plt.tight_layout()
plt.show()

Step 3: Select Relevant IMFs (e.g., first 3 IMFs)¶

In [14]:
selected_imfs = imfs

=== Step 4: Train/Val/Test Split ===¶

In [16]:
total_size = len(df)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
test_idx = slice(train_size + val_size, total_size)

=== Step 5: LSTM Helper Function ===¶

In [18]:
# Function to create sequences
def create_sequences(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i + look_back])
        y.append(data[i + look_back])
    return np.array(X), np.array(y)

=== Step 6: Train LSTM on each IMF with TSCV and Hyperparameter Tuning ===¶

In [20]:
from sklearn.model_selection import TimeSeriesSplit, ParameterGrid
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

1. Hyperparameter grid¶

In [22]:
# Sample parameter grids
look_back_options = [3, 5]
lstm_units_options = [32, 50]
epochs_options = [30, 50]
batch_size = 16

2. Initialize result storage¶

In [26]:
# Placeholder for results
best_params_summary = []
imf_predictions = []

3. Select IMF 3 to 6 (Python index 2 to 5)¶

In [34]:
selected_imfs = imfs

Start tuning¶

In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=1):
    print(f"🔍 Tuning LSTM for IMF {imf_index}")
    best_score = float('inf')
    best_params = None
    best_forecast = None

    param_grid = {
        'look_back': look_back_options,
        'lstm_units': lstm_units_options,
        'epochs': epochs_options
    }

    tscv = TimeSeriesSplit(n_splits=3)

    for params in ParameterGrid(param_grid):
        fold_losses = []

        for train_idx, val_idx in tscv.split(imf):
            scaler = MinMaxScaler()
            scaled_imf = scaler.fit_transform(imf.reshape(-1, 1)).flatten()

            train_series = scaled_imf[train_idx]
            val_series = scaled_imf[val_idx[0]-params['look_back']:val_idx[-1]+1]

            X_train, y_train = create_sequences(train_series, params['look_back'])
            X_val, y_val = create_sequences(val_series, params['look_back'])

            X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
            X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))

            model = Sequential()
            model.add(LSTM(params['lstm_units'], activation='relu', input_shape=(params['look_back'], 1)))
            model.add(Dense(1))
            model.compile(optimizer='adam', loss='mse')

            es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
            model.fit(X_train, y_train, epochs=params['epochs'], batch_size=batch_size, verbose=0, validation_data=(X_val, y_val), callbacks=[es])

            val_pred_scaled = model.predict(X_val)
            val_pred = scaler.inverse_transform(val_pred_scaled).flatten()
            actual_val = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
            score = mean_squared_error(actual_val, val_pred)
            fold_losses.append(score)

        avg_loss = np.mean(fold_losses)
        if avg_loss < best_score:
            best_score = avg_loss
            best_params = params

            scaler = MinMaxScaler()
            scaled_imf = scaler.fit_transform(imf.reshape(-1, 1)).flatten()

            train_series = scaled_imf[:train_size + val_size]
            X_train, y_train = create_sequences(train_series, params['look_back'])
            X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))

            test_series = scaled_imf[train_size + val_size - params['look_back']:]
            X_test, _ = create_sequences(test_series, params['look_back'])
            X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

            model = Sequential()
            model.add(LSTM(params['lstm_units'], activation='relu', input_shape=(params['look_back'], 1)))
            model.add(Dense(1))
            model.compile(optimizer='adam', loss='mse')
            model.fit(X_train, y_train, epochs=params['epochs'], batch_size=batch_size, verbose=0)

            forecast_scaled = model.predict(X_test)
            best_forecast = scaler.inverse_transform(forecast_scaled).flatten()

    print(f"✅ Best params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
    best_params_summary.append({
        "IMF": imf_index,
        "Look_back": best_params['look_back'],
        "Units": best_params['lstm_units'],
        "Epochs": best_params['epochs'],
        "Validation MSE": best_score
    })
    imf_predictions.append(best_forecast)
🔍 Tuning LSTM for IMF 1
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
WARNING:tensorflow:5 out of the last 11 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000021621CAB2E0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
1/5 ━━━━━━━━━━━━━━━━━━━━ 0s 144ms/stepWARNING:tensorflow:5 out of the last 11 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000021621CAB2E0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 75ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 70ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 66ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 71ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 73ms/step
✅ Best params for IMF 1: {'epochs': 50, 'look_back': 5, 'lstm_units': 50} with MSE=0.0029
🔍 Tuning LSTM for IMF 2
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 32ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 71ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
In [ ]:
# Print final model summary
print("\n🏆 Final Model Summary 🏆")
print("="*50)
print(f"{'IMF':<8}{'Look Back':<12}{'LSTM Units':<12}{'Epochs':<10}{'Validation MSE':<15}")
print("-"*50)
for summary in best_params_summary:
    print(f"{summary['IMF']:<8}{summary['Look_back']:<12}{summary['Units']:<12}{summary['Epochs']:<10}{summary['Validation MSE']:.4f}")
print("="*50)

# Calculate and print overall performance
total_mse = sum(item['Validation MSE'] for item in best_params_summary)
avg_mse = total_mse / len(best_params_summary)
print(f"\n📊 Overall Performance:")
print(f"  - Total Validation MSE across all IMFs: {total_mse:.4f}")
print(f"  - Average Validation MSE per IMF: {avg_mse:.4f}")

# Print final recommendations
print("\n💡 Recommendations:")
print("  - Best performing IMF components (lowest MSE):")
sorted_imfs = sorted(best_params_summary, key=lambda x: x['Validation MSE'])
for i, imf in enumerate(sorted_imfs[:3], 1):
    print(f"    {i}. IMF {imf['IMF']} (MSE: {imf['Validation MSE']:.4f})")
print("  - Consider focusing on these components for further optimization")
print("  - Higher MSE components may need different architecture or preprocessing")

=== Step 7: Reconstruct Final Forecast from IMFs ===¶

In [ ]:
reconstructed_scaled_forecast = np.sum(imf_predictions, axis=0)
reconstructed_forecast = scaler_total.inverse_transform(reconstructed_scaled_forecast.reshape(-1, 1)).flatten()
actual = df['Modal Price (Rs./kg)'].values[test_idx][:len(reconstructed_forecast)]
forecast_index = df.index[test_idx][:len(reconstructed_forecast)]
In [ ]:
reconstructed_forecast
In [ ]:
actual

=== Step 8: Evaluation ===¶

In [ ]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd

# Define datetime index
da = forecast_index

# Ensure arrays
actual = np.array(actual)
forecast = np.array(reconstructed_forecast)

# Evaluation metrics
epsilon = 1e-10
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = np.mean(np.abs((actual - forecast) / (actual + epsilon))) * 100
smape = 100 * np.mean(2 * np.abs(actual - forecast) / (np.abs(actual) + np.abs(forecast) + epsilon))
r2 = r2_score(actual, forecast)

# Directional Accuracy
actual_diff = np.diff(actual)
forecast_diff = np.diff(forecast)
direction_matches = np.sign(actual_diff) == np.sign(forecast_diff)
directional_accuracy = np.mean(direction_matches) * 100

# Create results DataFrame with datetime index
df_results = pd.DataFrame({
    'Date': da,
    'Actual': actual,
    'Forecast': forecast
})
df_results.set_index('Date', inplace=True)

# Display sample
print(df_results.head())

# Print metrics
print("\n" + "="*50)
print("MODEL PERFORMANCE METRICS".center(50))
print("="*50)
print(f"{'Samples Evaluated':<35}: {len(actual)}")
print(f"{'Mean Squared Error (MSE)':<35}: {mse:.2f}")
print(f"{'Root Mean Squared Error (RMSE)':<35}: {rmse:.2f}")
print(f"{'Mean Absolute Error (MAE)':<35}: {mae:.2f}")
print(f"{'Mean Absolute Percentage Error (MAPE)':<35}: {mape:.2f}%")
print(f"{'Symmetric MAPE (sMAPE)':<35}: {smape:.2f}%")
print(f"{'R-squared (R²)':<35}: {r2:.4f}")
print(f"{'Directional Accuracy (DA)':<35}: {directional_accuracy:.2f}%")
print("="*50)

=== Step 9: Plot Forecast vs Actual ===¶

In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (LSTM + EMD)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (SARIMA + EMD)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [38]:
plt.figure(figsize=(14, 6))

plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (LSTM + EMD)", linestyle='--', color='blue')
Out[38]:
[<matplotlib.lines.Line2D at 0x240c31cfc50>]
No description has been provided for this image
In [26]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout  # Changed GRU to LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5)  # Extract up to 5 IMFs

# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
    plt.grid()

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result2.png", dpi=300, bbox_inches='tight')
plt.show()

# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")

# Reconstruct signal from selected IMFs
reconstructed_data = np.sum(imfs, axis=0)

# Plot original vs reconstructed data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', linewidth=2)
plt.title("Original vs EMD-Reconstructed Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 2: Data Preparation for LSTM ---
# Use reconstructed data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(reconstructed_data.reshape(-1, 1))

# Create sequences for LSTM
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 3: Hyperparameter Tuning for LSTM ---
def build_model(hp):
    model = Sequential()
    
    # Number of LSTM layers (changed from GRU to LSTM)
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(  # Changed GRU to LSTM
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting LSTM hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='emd_lstm_tuning',  # Changed directory name
    project_name='cardamom_emd_lstm'  # Changed project name
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")  # Changed from GRU to LSTM
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")  # Changed from GRU to LSTM
    print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")  # Changed from GRU to LSTM

# --- Step 4: Build and Train Final LSTM Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final EMD-LSTM model...")  # Changed to LSTM
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_reconstructed = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on reconstructed data
metrics_reconstructed = evaluate_forecast(y_actual_reconstructed, y_pred)

# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)

print("\n" + "="*60)
print("EMD-LSTM MODEL TRAINING SUMMARY")  # Changed to LSTM
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Number of IMFs used: {len(imfs)}")
print("\nLSTM Model Architecture:")  # Changed to LSTM
final_model.summary()

print("\n" + "="*60)
print("EVALUATION ON RECONSTRUCTED DATA")
print("="*60)
for metric, value in metrics_reconstructed.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]

plt.figure(figsize=(18, 12))

# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('EMD-LSTM Training History')  # Changed to LSTM
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Reconstructed vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='LSTM Forecast', color='red', linestyle='--', linewidth=2)  # Changed to LSTM
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs EMD-Reconstructed vs LSTM Forecast')  # Changed to LSTM
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (LSTM)', color='red', linestyle='--', linewidth=2)  # Changed to LSTM
plt.fill_between(test_dates, 
                 y_pred - metrics_original['RMSE'], 
                 y_pred + metrics_original['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - EMD-LSTM Model (Test Period)')  # Changed to LSTM
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Individual IMF Analysis ---
plt.figure(figsize=(16, 4*len(imfs)))
for i, imf in enumerate(imfs):
    plt.subplot(len(imfs), 1, i+1)
    plt.plot(df.index, imf, color=['blue', 'green', 'red', 'purple', 'orange'][i % 5])
    plt.title(f'IMF {i+1} (Variance: {np.var(imf):.4f})')
    plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result6.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 9: Residual Analysis ---
residuals = y_actual_original - y_pred

plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('EMD-LSTM Residuals Over Time')  # Changed to LSTM
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('EMD-LSTM Residual Distribution')  # Changed to LSTM
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result8.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result9.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result11.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nEMD-LSTM Residual Analysis:")  # Changed to LSTM
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST - EMD-LSTM MODEL (NEXT 12 WEEKS)")  # Changed to LSTM
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='EMD-LSTM Future Forecast', color='red', linestyle='--', linewidth=2)  # Changed to LSTM
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('EMD-LSTM Future Price Forecast (Next 12 Weeks)')  # Changed to LSTM
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/elstR_result21.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- EMD-LSTM Benefits Summary ---
print("\n" + "="*60)
print("EMD-LSTM MODEL ADVANTAGES")
print("="*60)
print("1. Adaptive Decomposition: EMD adapts to data characteristics")
print("2. Multi-scale Analysis: Captures patterns at different time scales")
print("3. Long-term Memory: LSTM handles long-term dependencies effectively")
print("4. Noise Reduction: Removes high-frequency noise effectively")
print("5. Interpretability: IMFs provide insight into data components")
print("6. Non-linear Handling: Effective for non-stationary, non-linear data")
print("7. Robust Forecasting: Combines EMD's decomposition with LSTM's sequence learning")
print("8. Gate Mechanism: LSTM's gates control information flow for better learning")
print("9. Vanishing Gradient Solution: LSTM handles long sequences better than simple RNNs")
print("10. Complex Pattern Capture: Better for capturing complex temporal patterns")
Original data length: 722
Performing EMD decomposition...
No description has been provided for this image
Selected 5 meaningful IMFs
No description has been provided for this image
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting LSTM hyperparameter tuning...
Reloading Tuner from emd_lstm_tuning\cardamom_emd_lstm\tuner0.json

Best Hyperparameters:
Number of LSTM layers: 1
Learning rate: 0.0009511933717016039
LSTM layer 1 units: 32
LSTM layer 1 dropout: 0.1

Training final EMD-LSTM model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 58ms/step - loss: 0.0585 - mae: 0.1576 - val_loss: 0.0026 - val_mae: 0.0417
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0128 - mae: 0.0910 - val_loss: 0.0028 - val_mae: 0.0422
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0102 - mae: 0.0641 - val_loss: 0.0015 - val_mae: 0.0263
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0081 - mae: 0.0507 - val_loss: 0.0018 - val_mae: 0.0287
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0091 - mae: 0.0500 - val_loss: 0.0014 - val_mae: 0.0252
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0100 - mae: 0.0554 - val_loss: 0.0016 - val_mae: 0.0279
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0057 - mae: 0.0449 - val_loss: 0.0015 - val_mae: 0.0259
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0091 - mae: 0.0538 - val_loss: 0.0014 - val_mae: 0.0252
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0078 - mae: 0.0445 - val_loss: 0.0017 - val_mae: 0.0301
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0065 - mae: 0.0470 - val_loss: 0.0013 - val_mae: 0.0250
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0074 - mae: 0.0465 - val_loss: 0.0013 - val_mae: 0.0238
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0079 - mae: 0.0472 - val_loss: 0.0014 - val_mae: 0.0267
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0053 - mae: 0.0397 - val_loss: 0.0014 - val_mae: 0.0260
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0058 - mae: 0.0433 - val_loss: 0.0012 - val_mae: 0.0241
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0069 - mae: 0.0439 - val_loss: 0.0013 - val_mae: 0.0255
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0068 - mae: 0.0429 - val_loss: 0.0012 - val_mae: 0.0242
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0066 - mae: 0.0429 - val_loss: 0.0012 - val_mae: 0.0235
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0061 - mae: 0.0406 - val_loss: 0.0011 - val_mae: 0.0234
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0078 - mae: 0.0442 - val_loss: 0.0013 - val_mae: 0.0256
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0072 - mae: 0.0450 - val_loss: 0.0014 - val_mae: 0.0275
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0050 - mae: 0.0406 - val_loss: 0.0010 - val_mae: 0.0216
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0081 - mae: 0.0435 - val_loss: 0.0011 - val_mae: 0.0231
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0052 - mae: 0.0369 - val_loss: 0.0014 - val_mae: 0.0283
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0058 - mae: 0.0435 - val_loss: 9.7675e-04 - val_mae: 0.0210
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0065 - mae: 0.0410 - val_loss: 0.0013 - val_mae: 0.0271
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0042 - mae: 0.0345 - val_loss: 0.0014 - val_mae: 0.0284
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0042 - mae: 0.0392 - val_loss: 0.0012 - val_mae: 0.0254
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0343 - val_loss: 0.0017 - val_mae: 0.0325
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0063 - mae: 0.0466 - val_loss: 9.1518e-04 - val_mae: 0.0209
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0366 - val_loss: 0.0011 - val_mae: 0.0239
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0362 - val_loss: 0.0016 - val_mae: 0.0311
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0054 - mae: 0.0385 - val_loss: 8.1606e-04 - val_mae: 0.0197
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0051 - mae: 0.0379 - val_loss: 8.5663e-04 - val_mae: 0.0207
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0044 - mae: 0.0349 - val_loss: 0.0015 - val_mae: 0.0302
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0052 - mae: 0.0390 - val_loss: 7.7333e-04 - val_mae: 0.0190
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0039 - mae: 0.0322 - val_loss: 0.0014 - val_mae: 0.0297
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0060 - mae: 0.0452 - val_loss: 7.4305e-04 - val_mae: 0.0187
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0046 - mae: 0.0334 - val_loss: 0.0015 - val_mae: 0.0305
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0043 - mae: 0.0370 - val_loss: 9.1028e-04 - val_mae: 0.0222
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0046 - mae: 0.0372 - val_loss: 8.6741e-04 - val_mae: 0.0210
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0042 - mae: 0.0318 - val_loss: 0.0011 - val_mae: 0.0245
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0032 - mae: 0.0297 - val_loss: 7.3894e-04 - val_mae: 0.0193
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0044 - mae: 0.0340 - val_loss: 9.4439e-04 - val_mae: 0.0229
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0033 - mae: 0.0300 - val_loss: 9.4176e-04 - val_mae: 0.0227
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0052 - mae: 0.0389 - val_loss: 8.0505e-04 - val_mae: 0.0203
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0045 - mae: 0.0336 - val_loss: 7.6528e-04 - val_mae: 0.0197
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0037 - mae: 0.0308 - val_loss: 7.8578e-04 - val_mae: 0.0205
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0030 - mae: 0.0297 - val_loss: 8.7602e-04 - val_mae: 0.0219
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0040 - mae: 0.0316 - val_loss: 6.5140e-04 - val_mae: 0.0179
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0034 - mae: 0.0303 - val_loss: 9.1689e-04 - val_mae: 0.0227
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0034 - mae: 0.0313 - val_loss: 7.6802e-04 - val_mae: 0.0204
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0026 - mae: 0.0294 - val_loss: 6.1360e-04 - val_mae: 0.0175
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0027 - mae: 0.0281 - val_loss: 9.3396e-04 - val_mae: 0.0231
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0041 - mae: 0.0328 - val_loss: 5.4212e-04 - val_mae: 0.0161
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0040 - mae: 0.0329 - val_loss: 7.0556e-04 - val_mae: 0.0195
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0035 - mae: 0.0285 - val_loss: 5.5858e-04 - val_mae: 0.0167
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0030 - mae: 0.0267 - val_loss: 7.6100e-04 - val_mae: 0.0206
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0037 - mae: 0.0299 - val_loss: 6.8217e-04 - val_mae: 0.0193
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0027 - mae: 0.0284 - val_loss: 6.6307e-04 - val_mae: 0.0186
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0020 - mae: 0.0247 - val_loss: 6.1766e-04 - val_mae: 0.0181
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0037 - mae: 0.0317 - val_loss: 5.5153e-04 - val_mae: 0.0164
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0034 - mae: 0.0303 - val_loss: 7.6360e-04 - val_mae: 0.0206
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0029 - mae: 0.0274 - val_loss: 4.5563e-04 - val_mae: 0.0148
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0032 - mae: 0.0287 - val_loss: 9.5250e-04 - val_mae: 0.0241
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0025 - mae: 0.0288 - val_loss: 4.5510e-04 - val_mae: 0.0146
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0025 - mae: 0.0274 - val_loss: 6.7691e-04 - val_mae: 0.0194
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0025 - mae: 0.0273 - val_loss: 4.8812e-04 - val_mae: 0.0157
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0032 - mae: 0.0296 - val_loss: 4.3872e-04 - val_mae: 0.0146
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0033 - mae: 0.0282 - val_loss: 8.4235e-04 - val_mae: 0.0226
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0243 - val_loss: 4.0855e-04 - val_mae: 0.0142
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0024 - mae: 0.0266 - val_loss: 4.4461e-04 - val_mae: 0.0150
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0043 - mae: 0.0309 - val_loss: 6.5214e-04 - val_mae: 0.0193
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0032 - mae: 0.0306 - val_loss: 3.7003e-04 - val_mae: 0.0133
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0025 - mae: 0.0239 - val_loss: 5.9848e-04 - val_mae: 0.0186
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0028 - mae: 0.0295 - val_loss: 3.4162e-04 - val_mae: 0.0128
Epoch 76/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0249 - val_loss: 5.3592e-04 - val_mae: 0.0171
Epoch 77/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0249 - val_loss: 4.0675e-04 - val_mae: 0.0144
Epoch 78/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0262 - val_loss: 3.3470e-04 - val_mae: 0.0128
Epoch 79/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0036 - mae: 0.0299 - val_loss: 3.5309e-04 - val_mae: 0.0130
Epoch 80/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0268 - val_loss: 4.3947e-04 - val_mae: 0.0153
Epoch 81/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0024 - mae: 0.0282 - val_loss: 3.9156e-04 - val_mae: 0.0143
Epoch 82/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0220 - val_loss: 3.3303e-04 - val_mae: 0.0129
Epoch 83/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0246 - val_loss: 3.5768e-04 - val_mae: 0.0135
Epoch 84/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0236 - val_loss: 4.0562e-04 - val_mae: 0.0145
Epoch 85/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0242 - val_loss: 3.3461e-04 - val_mae: 0.0131
Epoch 86/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0028 - mae: 0.0264 - val_loss: 4.5306e-04 - val_mae: 0.0159
Epoch 87/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0264 - val_loss: 3.1444e-04 - val_mae: 0.0124
Epoch 88/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0025 - mae: 0.0242 - val_loss: 3.6463e-04 - val_mae: 0.0139
Epoch 89/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0231 - val_loss: 3.3915e-04 - val_mae: 0.0133
Epoch 90/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 3.2027e-04 - val_mae: 0.0134
Epoch 91/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0029 - mae: 0.0246 - val_loss: 4.8360e-04 - val_mae: 0.0167
Epoch 92/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0238 - val_loss: 3.3508e-04 - val_mae: 0.0133
Epoch 93/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0030 - mae: 0.0268 - val_loss: 2.6617e-04 - val_mae: 0.0117
Epoch 94/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0021 - mae: 0.0229 - val_loss: 2.6543e-04 - val_mae: 0.0116
Epoch 95/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0015 - mae: 0.0210 - val_loss: 4.7103e-04 - val_mae: 0.0166
Epoch 96/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0244 - val_loss: 2.8703e-04 - val_mae: 0.0124
Epoch 97/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0021 - mae: 0.0225 - val_loss: 4.7690e-04 - val_mae: 0.0169
Epoch 98/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0017 - mae: 0.0212 - val_loss: 2.9993e-04 - val_mae: 0.0124
Epoch 99/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 2.7499e-04 - val_mae: 0.0118
Epoch 100/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0021 - mae: 0.0239 - val_loss: 3.5554e-04 - val_mae: 0.0140
Epoch 101/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0249 - val_loss: 3.2368e-04 - val_mae: 0.0130
Epoch 102/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0019 - mae: 0.0219 - val_loss: 3.2849e-04 - val_mae: 0.0131
Epoch 103/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0209 - val_loss: 2.4490e-04 - val_mae: 0.0110
Epoch 104/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0233 - val_loss: 2.5431e-04 - val_mae: 0.0113
Epoch 105/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0017 - mae: 0.0212 - val_loss: 5.1841e-04 - val_mae: 0.0182
Epoch 106/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0021 - mae: 0.0267 - val_loss: 3.1582e-04 - val_mae: 0.0141
Epoch 107/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0016 - mae: 0.0234 - val_loss: 2.5921e-04 - val_mae: 0.0117
Epoch 108/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 2.4556e-04 - val_mae: 0.0114
Epoch 109/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0217 - val_loss: 2.7566e-04 - val_mae: 0.0121
Epoch 110/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0213 - val_loss: 3.3272e-04 - val_mae: 0.0137
Epoch 111/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0227 - val_loss: 2.0712e-04 - val_mae: 0.0102
Epoch 112/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0207 - val_loss: 1.9954e-04 - val_mae: 0.0100
Epoch 113/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0015 - mae: 0.0203 - val_loss: 1.9679e-04 - val_mae: 0.0100
Epoch 114/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0224 - val_loss: 2.6282e-04 - val_mae: 0.0119
Epoch 115/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0195 - val_loss: 4.3449e-04 - val_mae: 0.0168
Epoch 116/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0013 - mae: 0.0218 - val_loss: 1.8536e-04 - val_mae: 0.0095
Epoch 117/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0021 - mae: 0.0222 - val_loss: 3.5083e-04 - val_mae: 0.0144
Epoch 118/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0014 - mae: 0.0203 - val_loss: 2.2835e-04 - val_mae: 0.0109
Epoch 119/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0016 - mae: 0.0221 - val_loss: 3.1757e-04 - val_mae: 0.0137
Epoch 120/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0200 - val_loss: 2.0700e-04 - val_mae: 0.0104
Epoch 121/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0203 - val_loss: 1.7707e-04 - val_mae: 0.0095
Epoch 122/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0189 - val_loss: 2.0155e-04 - val_mae: 0.0109
Epoch 123/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0018 - mae: 0.0214 - val_loss: 7.8670e-04 - val_mae: 0.0243
Epoch 124/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0012 - mae: 0.0216 - val_loss: 1.6159e-04 - val_mae: 0.0089
Epoch 125/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 1.6125e-04 - val_mae: 0.0091
Epoch 126/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0187 - val_loss: 1.8770e-04 - val_mae: 0.0099
Epoch 127/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0194 - val_loss: 1.5224e-04 - val_mae: 0.0088
Epoch 128/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0191 - val_loss: 4.8986e-04 - val_mae: 0.0185
Epoch 129/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.9127e-04 - mae: 0.0194 - val_loss: 1.4994e-04 - val_mae: 0.0085
Epoch 130/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0188 - val_loss: 1.3761e-04 - val_mae: 0.0085
Epoch 131/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0012 - mae: 0.0192 - val_loss: 2.1265e-04 - val_mae: 0.0110
Epoch 132/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0012 - mae: 0.0199 - val_loss: 2.2934e-04 - val_mae: 0.0113
Epoch 133/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0207 - val_loss: 1.3606e-04 - val_mae: 0.0084
Epoch 134/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0177 - val_loss: 1.4136e-04 - val_mae: 0.0087
Epoch 135/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0186 - val_loss: 1.3536e-04 - val_mae: 0.0082
Epoch 136/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.1539e-04 - mae: 0.0169 - val_loss: 4.2233e-04 - val_mae: 0.0166
Epoch 137/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0014 - mae: 0.0214 - val_loss: 1.2505e-04 - val_mae: 0.0080
Epoch 138/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0013 - mae: 0.0186 - val_loss: 1.5058e-04 - val_mae: 0.0090
Epoch 139/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 7.2309e-04 - mae: 0.0169 - val_loss: 1.2093e-04 - val_mae: 0.0079
Epoch 140/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0012 - mae: 0.0183 - val_loss: 1.1873e-04 - val_mae: 0.0078
Epoch 141/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0171 - val_loss: 1.2033e-04 - val_mae: 0.0080
Epoch 142/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.6240e-04 - mae: 0.0167 - val_loss: 1.1982e-04 - val_mae: 0.0080
Epoch 143/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0185 - val_loss: 3.3373e-04 - val_mae: 0.0147
Epoch 144/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.4580e-04 - mae: 0.0179 - val_loss: 2.5442e-04 - val_mae: 0.0125
Epoch 145/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.7754e-04 - mae: 0.0171 - val_loss: 1.3433e-04 - val_mae: 0.0085
Epoch 146/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0013 - mae: 0.0186 - val_loss: 1.0841e-04 - val_mae: 0.0076
Epoch 147/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0181 - val_loss: 1.2146e-04 - val_mae: 0.0081
Epoch 148/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.8543e-04 - mae: 0.0166 - val_loss: 1.0910e-04 - val_mae: 0.0077
Epoch 149/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0178 - val_loss: 2.5051e-04 - val_mae: 0.0123
Epoch 150/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0195 - val_loss: 1.8388e-04 - val_mae: 0.0101
Epoch 151/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.5832e-04 - mae: 0.0163 - val_loss: 1.1106e-04 - val_mae: 0.0075
Epoch 152/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0015 - mae: 0.0189 - val_loss: 1.6470e-04 - val_mae: 0.0096
Epoch 153/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.8445e-04 - mae: 0.0157 - val_loss: 2.8354e-04 - val_mae: 0.0134
Epoch 154/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0186 - val_loss: 1.1186e-04 - val_mae: 0.0077
Epoch 155/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.8480e-04 - mae: 0.0160 - val_loss: 1.3828e-04 - val_mae: 0.0086
Epoch 156/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0169 - val_loss: 2.5993e-04 - val_mae: 0.0128
Epoch 157/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0014 - mae: 0.0183 - val_loss: 3.0558e-04 - val_mae: 0.0144
Epoch 158/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0010 - mae: 0.0178 - val_loss: 9.1795e-05 - val_mae: 0.0069
Epoch 159/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.1813e-04 - mae: 0.0158 - val_loss: 1.4611e-04 - val_mae: 0.0099
Epoch 160/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0011 - mae: 0.0178 - val_loss: 1.6432e-04 - val_mae: 0.0106
Epoch 161/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0174 - val_loss: 1.1594e-04 - val_mae: 0.0077
Epoch 162/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.0594e-04 - mae: 0.0145 - val_loss: 1.1772e-04 - val_mae: 0.0079
Epoch 163/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 6.1016e-04 - mae: 0.0144 - val_loss: 2.0367e-04 - val_mae: 0.0112
Epoch 164/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0011 - mae: 0.0183 - val_loss: 1.2677e-04 - val_mae: 0.0083
Epoch 165/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 5.8384e-04 - mae: 0.0136 - val_loss: 1.0950e-04 - val_mae: 0.0075
Epoch 166/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.0814e-04 - mae: 0.0167 - val_loss: 9.1837e-05 - val_mae: 0.0070
Epoch 167/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.7039e-04 - mae: 0.0156 - val_loss: 9.4053e-05 - val_mae: 0.0072
Epoch 168/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0163 - val_loss: 1.1507e-04 - val_mae: 0.0076
Epoch 169/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 9.1908e-04 - mae: 0.0159 - val_loss: 1.2428e-04 - val_mae: 0.0081
Epoch 170/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.6696e-04 - mae: 0.0161 - val_loss: 8.2288e-05 - val_mae: 0.0067
Epoch 171/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.2600e-04 - mae: 0.0161 - val_loss: 1.0296e-04 - val_mae: 0.0077
Epoch 172/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.2578e-04 - mae: 0.0164 - val_loss: 9.2268e-05 - val_mae: 0.0070
Epoch 173/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 6.7954e-04 - mae: 0.0145 - val_loss: 2.4366e-04 - val_mae: 0.0126
Epoch 174/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.1940e-04 - mae: 0.0155 - val_loss: 1.0093e-04 - val_mae: 0.0070
Epoch 175/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - loss: 7.1997e-04 - mae: 0.0147 - val_loss: 2.2690e-04 - val_mae: 0.0120
Epoch 176/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 7.7937e-04 - mae: 0.0164 - val_loss: 1.3270e-04 - val_mae: 0.0086
Epoch 177/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 8.9832e-04 - mae: 0.0173 - val_loss: 7.6299e-05 - val_mae: 0.0064
Epoch 178/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.7817e-04 - mae: 0.0156 - val_loss: 7.6378e-05 - val_mae: 0.0064
Epoch 179/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.0183e-04 - mae: 0.0141 - val_loss: 7.9451e-05 - val_mae: 0.0065
Epoch 180/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 6.6811e-04 - mae: 0.0144 - val_loss: 9.9020e-05 - val_mae: 0.0072
Epoch 181/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.8176e-04 - mae: 0.0157 - val_loss: 3.4142e-04 - val_mae: 0.0157
Epoch 182/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0015 - mae: 0.0204 - val_loss: 8.6025e-05 - val_mae: 0.0066
Epoch 183/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.2775e-04 - mae: 0.0143 - val_loss: 9.6719e-05 - val_mae: 0.0072
Epoch 184/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 5.7785e-04 - mae: 0.0141 - val_loss: 7.7405e-05 - val_mae: 0.0065
Epoch 185/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 8.1382e-04 - mae: 0.0151 - val_loss: 9.5966e-05 - val_mae: 0.0070
Epoch 186/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 5.9626e-04 - mae: 0.0144 - val_loss: 2.2421e-04 - val_mae: 0.0123
Epoch 187/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 8.4605e-04 - mae: 0.0164 - val_loss: 1.4574e-04 - val_mae: 0.0094
Epoch 188/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.7219e-04 - mae: 0.0162 - val_loss: 8.9095e-05 - val_mae: 0.0070
Epoch 189/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.9759e-04 - mae: 0.0147 - val_loss: 1.0607e-04 - val_mae: 0.0076
Epoch 190/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.6061e-04 - mae: 0.0159 - val_loss: 9.9451e-05 - val_mae: 0.0072
Epoch 191/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 7.5490e-04 - mae: 0.0159 - val_loss: 7.2481e-05 - val_mae: 0.0060
Epoch 192/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.4144e-04 - mae: 0.0149 - val_loss: 1.2695e-04 - val_mae: 0.0096
Epoch 193/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.1294e-04 - mae: 0.0153 - val_loss: 7.7313e-05 - val_mae: 0.0062
Epoch 194/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0011 - mae: 0.0165 - val_loss: 2.1930e-04 - val_mae: 0.0123
Epoch 195/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.8209e-04 - mae: 0.0154 - val_loss: 1.7889e-04 - val_mae: 0.0109
Epoch 196/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 9.1040e-04 - mae: 0.0175 - val_loss: 5.9942e-05 - val_mae: 0.0056
Epoch 197/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 6.3125e-04 - mae: 0.0130 - val_loss: 1.7965e-04 - val_mae: 0.0110
Epoch 198/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 8.8768e-04 - mae: 0.0171 - val_loss: 1.1937e-04 - val_mae: 0.0082
Epoch 199/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 9.5758e-04 - mae: 0.0166 - val_loss: 7.4636e-05 - val_mae: 0.0060
Epoch 200/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 5.0818e-04 - mae: 0.0128 - val_loss: 2.2375e-04 - val_mae: 0.0127
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step

============================================================
EMD-LSTM MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 200
Best validation loss: 0.0001
Best validation MAE: 0.0056
Lookback period: 52 weeks
Number of IMFs used: 5

LSTM Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                          │ (None, 32)                  │           4,352 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout)                    │ (None, 32)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 1)                   │              33 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 13,157 (51.40 KB)
 Trainable params: 4,385 (17.13 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 8,772 (34.27 KB)
============================================================
EVALUATION ON RECONSTRUCTED DATA
============================================================
MSE: 3562.6696
RMSE: 59.6881
MAE: 44.4461
MAPE: 0.03%
R²: 0.9789
Directional Accuracy: 91.09%

============================================================
EVALUATION ON ORIGINAL DATA
============================================================
MSE: 25499.4386
RMSE: 159.6854
MAE: 113.4441
MAPE: 0.07%
R²: 0.8579
Directional Accuracy: 29.70%
<Figure size 1800x1200 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
EMD-LSTM Residual Analysis:
Residual mean: 33.3420
Residual std: 156.1658
Residual min: -509.8165
Residual max: 655.3623

==================================================
FUTURE FORECAST - EMD-LSTM MODEL (NEXT 12 WEEKS)
==================================================
2024-11-03: 2103.75
2024-11-10: 2001.36
2024-11-17: 1899.58
2024-11-24: 1822.62
2024-12-01: 1770.56
2024-12-08: 1736.21
2024-12-15: 1712.98
2024-12-22: 1696.06
2024-12-29: 1681.84
2025-01-05: 1667.67
2025-01-12: 1651.91
2025-01-19: 1633.96
No description has been provided for this image
============================================================
EMD-LSTM MODEL ADVANTAGES
============================================================
1. Adaptive Decomposition: EMD adapts to data characteristics
2. Multi-scale Analysis: Captures patterns at different time scales
3. Long-term Memory: LSTM handles long-term dependencies effectively
4. Noise Reduction: Removes high-frequency noise effectively
5. Interpretability: IMFs provide insight into data components
6. Non-linear Handling: Effective for non-stationary, non-linear data
7. Robust Forecasting: Combines EMD's decomposition with LSTM's sequence learning
8. Gate Mechanism: LSTM's gates control information flow for better learning
9. Vanishing Gradient Solution: LSTM handles long sequences better than simple RNNs
10. Complex Pattern Capture: Better for capturing complex temporal patterns
In [40]:
# --- Step 11: Detailed Model Configuration Report & JSON Export (EMD-LSTM) ---
import json
import datetime
import platform
from tensorflow.keras import backend as K

print("\n" + "="*60)
print("EMD + LSTM MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)

report = {}

# Optimizer details
try:
    opt = final_model.optimizer
    opt_name = opt.__class__.__name__
    try:
        lr_val = float(K.get_value(opt.learning_rate))
    except Exception:
        opt_cfg = opt.get_config()
        lr_val = opt_cfg.get('learning_rate', None)
        try:
            lr_val = float(lr_val)
        except Exception:
            pass
    report['optimizer'] = opt_name
    report['learning_rate'] = lr_val
    print(f"Optimizer: {opt_name}")
    print(f"Learning Rate: {lr_val}")
except Exception as e:
    report['optimizer'] = str(e)
    print(f"Could not fetch optimizer details: {e}")

# Hyperparameters
try:
    report['best_hyperparameters'] = best_hp.values
    print("\nBest Hyperparameters (from tuner):")
    for k, v in best_hp.values.items():
        print(f"  {k}: {v}")
except Exception as e:
    report['best_hyperparameters'] = None
    print(f"No best hyperparameters found: {e}")

# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
    layer_info = {
        'index': i+1,
        'class_name': layer.__class__.__name__,
        'name': layer.name
    }
    if hasattr(layer, 'units'):
        layer_info['units'] = getattr(layer, 'units', None)
        print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
    else:
        print(f" Layer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, 'activation'):
        try:
            layer_info['activation'] = layer.activation.__name__
        except Exception:
            layer_info['activation'] = str(layer.activation)
    if hasattr(layer, 'rate'):
        layer_info['dropout_rate'] = getattr(layer, 'rate', None)
    if hasattr(layer, 'return_sequences'):
        layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
    try:
        layer_info['input_shape'] = layer.input_shape
        layer_info['output_shape'] = layer.output_shape
    except:
        layer_info['input_shape'] = None
        layer_info['output_shape'] = None
    layers_report.append(layer_info)

report['layers'] = layers_report

# Training summary
training_summary = {
    'lookback': lookback,
    'epochs_trained': len(history.history['loss']),
    'final_training_loss': float(history.history['loss'][-1]),
    'final_validation_loss': float(min(history.history['val_loss'])),
    'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
    'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
    'num_imfs_used': len(imfs)
}
report['training_summary'] = training_summary

print("\nTraining Summary:")
for k, v in training_summary.items():
    print(f" {k}: {v}")

# Evaluation
try:
    report['evaluation_metrics'] = {
        'reconstructed': metrics_reconstructed,
        'original': metrics_original
    }
    print("\nEvaluation Metrics attached.")
except Exception as e:
    report['evaluation_metrics'] = None
    print(f"Could not attach evaluation metrics: {e}")

# Residual stats
try:
    residuals_stats = {
        'residual_mean': float(np.mean(residuals)),
        'residual_std': float(np.std(residuals)),
        'residual_min': float(np.min(residuals)),
        'residual_max': float(np.max(residuals))
    }
    report['residuals'] = residuals_stats
    print("\nResiduals Summary attached.")
except Exception as e:
    print(f"Residual stats failed: {e}")

# Future forecast (if available)
try:
    forecast_report = {
        'dates': [str(d) for d in future_dates],
        'forecasted_prices': [float(p) for p in future_prices]
    }
    report['future_forecast'] = forecast_report
    print("\nFuture forecast added to report.")
except Exception as e:
    report['future_forecast'] = None
    print(f"Future forecast not added: {e}")

# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__

# Save JSON
report_filename = "emd_lstm_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
    json.dump(report, f, indent=2, ensure_ascii=False)

print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
EMD + LSTM MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0009511933894827962

Best Hyperparameters (from tuner):
  num_layers: 1
  units_0: 32
  dropout_0: 0.1
  dense_layers: 0
  learning_rate: 0.0009511933717016039
  units_1: 160
  dropout_1: 0.30000000000000004
  dense_units_0: 128
  dense_dropout_0: 0.4
  dense_units_1: 16
  dense_dropout_1: 0.4
  units_2: 32
  dropout_2: 0.1

Model Layers:
 Layer 1: LSTM - units: 32
 Layer 2: Dropout
 Layer 3: Dense - units: 1

Training Summary:
 lookback: 52
 epochs_trained: 200
 final_training_loss: 0.0008889764430932701
 final_validation_loss: 6.141273479443043e-05
 final_training_mae: 0.015297123230993748
 final_validation_mae: 0.005484431982040405
 num_imfs_used: 5

Evaluation Metrics attached.

Residuals Summary attached.

Future forecast added to report.

Saved detailed report to: emd_lstm_report.json
============================================================
REPORT COMPLETE
============================================================
In [41]:
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5)  # Extract up to 5 IMFs

# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
    plt.grid()

plt.tight_layout()

# Save the figure as PNG (high resolution)
plt.savefig("emd_decomposition3.png", dpi=300)

plt.show()
Performing EMD decomposition...
No description has been provided for this image
In [28]:
plt.figure(figsize=(14, 6))
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (LSTM)', color='red', linestyle='--', linewidth=2)  # Changed to LSTM

plt.title('Actual vs Predicted - EMD-LSTM Model (Test Period)')  # Changed to LSTM
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result211.png", dpi=300, bbox_inches='tight')
plt.show()
<Figure size 1400x600 with 0 Axes>
No description has been provided for this image
In [ ]:
 
In [55]:
############

-------EMD + GRU ----¶

============ IMPORT LIBRARIES ============¶

In [30]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Suppress warnings
warnings.filterwarnings("ignore")

df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

print(f"Original data length: {len(data)}")

# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5)  # Extract up to 5 IMFs

# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()

for i, imf in enumerate(imfs):
    plt.subplot(len(imfs)+1, 1, i+2)
    plt.plot(df.index, imf, 'g')
    plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
    plt.grid()

plt.tight_layout()
plt.show()

# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")

# Reconstruct signal from selected IMFs
reconstructed_data = np.sum(imfs, axis=0)

# Plot original vs reconstructed data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', linewidth=2)
plt.title("Original vs EMD-Reconstructed Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result1.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 2: Data Preparation for GRU ---
# Use reconstructed data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(reconstructed_data.reshape(-1, 1))

# Create sequences for GRU
def create_sequences(data, lookback=52):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lookback = 52  # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)

# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

# --- Step 3: Hyperparameter Tuning for GRU ---
def build_model(hp):
    model = Sequential()
    
    # Number of GRU layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(GRU(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    # Dense layers with ReLU activation
    for i in range(hp.Int('dense_layers', 0, 2)):
        model.add(Dense(
            units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
            activation='relu'
        ))
        model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mse',
        metrics=['mae']
    )
    
    return model

print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=2,
    directory='emd_gru_tuning',
    project_name='cardamom_emd_gru'
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
    print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}")
    print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")

# --- Step 4: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)

print("\nTraining final EMD-GRU model...")
history = final_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()

# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_reconstructed = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
    """Comprehensive forecast evaluation"""
    mse = mean_squared_error(actual, forecast)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, forecast)
    mape = mean_absolute_percentage_error(actual, forecast)
    r2 = r2_score(actual, forecast)
    
    # Directional accuracy
    actual_diff = np.sign(np.diff(actual))
    forecast_diff = np.sign(np.diff(forecast))
    da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R²': r2,
        'Directional Accuracy': da
    }

# Evaluate on reconstructed data
metrics_reconstructed = evaluate_forecast(y_actual_reconstructed, y_pred)

# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)

print("\n" + "="*60)
print("EMD-GRU MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Number of IMFs used: {len(imfs)}")
print("\nGRU Model Architecture:")
final_model.summary()

print("\n" + "="*60)
print("EVALUATION ON RECONSTRUCTED DATA")
print("="*60)
for metric, value in metrics_reconstructed.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
    if metric == 'MAPE':
        print(f"{metric}: {value:.2f}%")
    elif metric == 'Directional Accuracy':
        print(f"{metric}: {value:.2f}%")
    else:
        print(f"{metric}: {value:.4f}")

# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]



# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('EMD-GRU Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Reconstructed vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='GRU Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs EMD-Reconstructed vs GRU Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates, 
                 y_pred - metrics_original['RMSE'], 
                 y_pred + metrics_original['RMSE'], 
                 alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - EMD-GRU Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result4.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 8: Individual IMF Analysis ---
plt.figure(figsize=(16, 4*len(imfs)))
for i, imf in enumerate(imfs):
    plt.subplot(len(imfs), 1, i+1)
    plt.plot(df.index, imf, color=['blue', 'green', 'red', 'purple', 'orange'][i % 5])
    plt.title(f'IMF {i+1} (Variance: {np.var(imf):.4f})')
    plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result5.png", dpi=300, bbox_inches='tight')
plt.show()

# --- Step 9: Residual Analysis ---
residuals = y_actual_original - y_pred



plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('EMD-GRU Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('EMD-GRU Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result8.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result9.png", dpi=300, bbox_inches='tight')
plt.show()

print("\nEMD-GRU Residual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")

# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
    """Forecast future values"""
    forecasts = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
        forecasts.append(prediction)
        
        # Update sequence
        new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
        current_sequence = new_sequence
    
    # Inverse transform
    forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    
    # Create future dates
    last_date = df.index[-1]
    future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
    
    return future_dates, forecasts

# Forecast next 12 weeks
try:
    last_sequence = scaled_data[-lookback:]
    future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)

    print("\n" + "="*50)
    print("FUTURE FORECAST - EMD-GRU MODEL (NEXT 12 WEEKS)")
    print("="*50)
    for date, price in zip(future_dates, future_prices):
        print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
        
    # Plot future forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
    plt.plot(future_dates, future_prices, label='EMD-GRU Future Forecast', color='red', linestyle='--', linewidth=2)
    plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
    plt.title('EMD-GRU Future Price Forecast (Next 12 Weeks)')
    plt.xlabel('Date')
    plt.ylabel('Price (Rs./kg)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("C:/Users/marti/Desktop/png/egr_result11.png", dpi=300, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Future forecasting failed: {e}")

# --- EMD-GRU Benefits Summary ---
print("\n" + "="*60)
print("EMD-GRU MODEL ADVANTAGES")
print("="*60)
print("1. Adaptive Decomposition: EMD adapts to data characteristics")
print("2. Multi-scale Analysis: Captures patterns at different time scales")
print("3. Noise Reduction: Removes high-frequency noise effectively")
print("4. GRU Efficiency: Faster training than LSTM with similar performance")
print("5. Interpretability: IMFs provide insight into data components")
print("6. Non-linear Handling: Effective for non-stationary, non-linear data")
print("7. Data-driven: No predefined basis functions needed")
Original data length: 722
Performing EMD decomposition...
No description has been provided for this image
Selected 5 meaningful IMFs
No description has been provided for this image
Training sequences: (468, 52, 1)
Validation sequences: (100, 52, 1)
Test sequences: (102, 52, 1)

Starting GRU hyperparameter tuning...
Reloading Tuner from emd_gru_tuning\cardamom_emd_gru\tuner0.json

Best Hyperparameters:
Number of GRU layers: 1
Learning rate: 0.0019911924591572727
GRU layer 1 units: 160
GRU layer 1 dropout: 0.30000000000000004

Training final EMD-GRU model...
Epoch 1/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 6s 81ms/step - loss: 0.0293 - mae: 0.1179 - val_loss: 0.0023 - val_mae: 0.0411
Epoch 2/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0111 - mae: 0.0539 - val_loss: 0.0015 - val_mae: 0.0323
Epoch 3/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0046 - mae: 0.0360 - val_loss: 5.6078e-04 - val_mae: 0.0173
Epoch 4/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0057 - mae: 0.0351 - val_loss: 5.9899e-04 - val_mae: 0.0185
Epoch 5/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0053 - mae: 0.0338 - val_loss: 5.2553e-04 - val_mae: 0.0161
Epoch 6/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0038 - mae: 0.0293 - val_loss: 4.0579e-04 - val_mae: 0.0150
Epoch 7/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0037 - mae: 0.0271 - val_loss: 4.4850e-04 - val_mae: 0.0159
Epoch 8/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0032 - mae: 0.0270 - val_loss: 3.0549e-04 - val_mae: 0.0123
Epoch 9/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0022 - mae: 0.0243 - val_loss: 2.7703e-04 - val_mae: 0.0119
Epoch 10/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0030 - mae: 0.0275 - val_loss: 4.6843e-04 - val_mae: 0.0176
Epoch 11/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0030 - mae: 0.0305 - val_loss: 6.8287e-04 - val_mae: 0.0224
Epoch 12/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0022 - mae: 0.0278 - val_loss: 6.6151e-04 - val_mae: 0.0219
Epoch 13/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0023 - mae: 0.0284 - val_loss: 4.6612e-04 - val_mae: 0.0179
Epoch 14/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0024 - mae: 0.0279 - val_loss: 4.6447e-04 - val_mae: 0.0180
Epoch 15/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0022 - mae: 0.0260 - val_loss: 6.3321e-04 - val_mae: 0.0218
Epoch 16/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0025 - mae: 0.0259 - val_loss: 1.5348e-04 - val_mae: 0.0091
Epoch 17/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0028 - mae: 0.0253 - val_loss: 1.3854e-04 - val_mae: 0.0084
Epoch 18/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0012 - mae: 0.0186 - val_loss: 2.2181e-04 - val_mae: 0.0119
Epoch 19/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0015 - mae: 0.0250 - val_loss: 2.7304e-04 - val_mae: 0.0131
Epoch 20/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0017 - mae: 0.0224 - val_loss: 1.8188e-04 - val_mae: 0.0103
Epoch 21/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0011 - mae: 0.0184 - val_loss: 1.8933e-04 - val_mae: 0.0115
Epoch 22/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0014 - mae: 0.0218 - val_loss: 1.1561e-04 - val_mae: 0.0078
Epoch 23/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0017 - mae: 0.0206 - val_loss: 3.0882e-04 - val_mae: 0.0149
Epoch 24/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0017 - mae: 0.0219 - val_loss: 9.9181e-05 - val_mae: 0.0072
Epoch 25/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0014 - mae: 0.0200 - val_loss: 1.8548e-04 - val_mae: 0.0115
Epoch 26/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0191 - val_loss: 8.9581e-05 - val_mae: 0.0069
Epoch 27/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0012 - mae: 0.0196 - val_loss: 9.8907e-05 - val_mae: 0.0074
Epoch 28/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0013 - mae: 0.0188 - val_loss: 1.8344e-04 - val_mae: 0.0112
Epoch 29/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0010 - mae: 0.0186 - val_loss: 3.1848e-04 - val_mae: 0.0155
Epoch 30/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0015 - mae: 0.0217 - val_loss: 1.0439e-04 - val_mae: 0.0078
Epoch 31/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0184 - val_loss: 1.4396e-04 - val_mae: 0.0095
Epoch 32/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0197 - val_loss: 3.8981e-04 - val_mae: 0.0174
Epoch 33/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 9.8743e-04 - mae: 0.0189 - val_loss: 4.1721e-04 - val_mae: 0.0171
Epoch 34/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 7.9451e-04 - mae: 0.0170 - val_loss: 9.5853e-05 - val_mae: 0.0076
Epoch 35/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.7573e-04 - mae: 0.0148 - val_loss: 1.1386e-04 - val_mae: 0.0083
Epoch 36/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0011 - mae: 0.0190 - val_loss: 1.0238e-04 - val_mae: 0.0080
Epoch 37/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.0166e-04 - mae: 0.0159 - val_loss: 2.1911e-04 - val_mae: 0.0126
Epoch 38/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.8838e-04 - mae: 0.0175 - val_loss: 1.2841e-04 - val_mae: 0.0086
Epoch 39/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.7967e-04 - mae: 0.0160 - val_loss: 2.8344e-04 - val_mae: 0.0140
Epoch 40/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 9.1914e-04 - mae: 0.0195 - val_loss: 8.2697e-05 - val_mae: 0.0070
Epoch 41/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 9.0963e-04 - mae: 0.0170 - val_loss: 2.2206e-04 - val_mae: 0.0122
Epoch 42/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0012 - mae: 0.0195 - val_loss: 1.0506e-04 - val_mae: 0.0079
Epoch 43/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 8.5566e-04 - mae: 0.0167 - val_loss: 7.0648e-05 - val_mae: 0.0063
Epoch 44/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.4027e-04 - mae: 0.0150 - val_loss: 7.5281e-05 - val_mae: 0.0065
Epoch 45/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.7940e-04 - mae: 0.0162 - val_loss: 7.5000e-05 - val_mae: 0.0067
Epoch 46/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.9797e-04 - mae: 0.0153 - val_loss: 6.6143e-05 - val_mae: 0.0062
Epoch 47/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 9.7651e-04 - mae: 0.0178 - val_loss: 7.6148e-05 - val_mae: 0.0067
Epoch 48/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.7434e-04 - mae: 0.0144 - val_loss: 1.2699e-04 - val_mae: 0.0086
Epoch 49/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.7892e-04 - mae: 0.0168 - val_loss: 7.3928e-05 - val_mae: 0.0065
Epoch 50/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 7.6537e-04 - mae: 0.0143 - val_loss: 2.4773e-04 - val_mae: 0.0134
Epoch 51/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 6.4867e-04 - mae: 0.0164 - val_loss: 8.9058e-05 - val_mae: 0.0075
Epoch 52/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.3921e-04 - mae: 0.0160 - val_loss: 1.5230e-04 - val_mae: 0.0109
Epoch 53/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 8.3437e-04 - mae: 0.0172 - val_loss: 3.1185e-04 - val_mae: 0.0159
Epoch 54/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 8.9073e-04 - mae: 0.0186 - val_loss: 6.0320e-05 - val_mae: 0.0059
Epoch 55/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 8.2049e-04 - mae: 0.0171 - val_loss: 9.8044e-05 - val_mae: 0.0081
Epoch 56/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0483e-04 - mae: 0.0146 - val_loss: 5.6578e-05 - val_mae: 0.0057
Epoch 57/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 7.0991e-04 - mae: 0.0161 - val_loss: 7.0625e-05 - val_mae: 0.0064
Epoch 58/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 7.0692e-04 - mae: 0.0169 - val_loss: 9.1418e-05 - val_mae: 0.0070
Epoch 59/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.3067e-04 - mae: 0.0154 - val_loss: 5.5248e-05 - val_mae: 0.0057
Epoch 60/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 7.5652e-04 - mae: 0.0160 - val_loss: 6.7263e-05 - val_mae: 0.0063
Epoch 61/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.4285e-04 - mae: 0.0163 - val_loss: 7.8720e-05 - val_mae: 0.0067
Epoch 62/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 5.4435e-04 - mae: 0.0152 - val_loss: 1.0905e-04 - val_mae: 0.0083
Epoch 63/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.9498e-04 - mae: 0.0137 - val_loss: 6.7024e-05 - val_mae: 0.0064
Epoch 64/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1494e-04 - mae: 0.0150 - val_loss: 5.5036e-05 - val_mae: 0.0056
Epoch 65/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.2355e-04 - mae: 0.0141 - val_loss: 8.0716e-05 - val_mae: 0.0068
Epoch 66/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1913e-04 - mae: 0.0158 - val_loss: 1.0333e-04 - val_mae: 0.0085
Epoch 67/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.3126e-04 - mae: 0.0132 - val_loss: 1.0796e-04 - val_mae: 0.0088
Epoch 68/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 6.5458e-04 - mae: 0.0154 - val_loss: 4.4240e-05 - val_mae: 0.0050
Epoch 69/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0844e-04 - mae: 0.0134 - val_loss: 1.5036e-04 - val_mae: 0.0102
Epoch 70/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.6434e-04 - mae: 0.0162 - val_loss: 1.3520e-04 - val_mae: 0.0099
Epoch 71/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.9844e-04 - mae: 0.0166 - val_loss: 5.7778e-05 - val_mae: 0.0057
Epoch 72/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.7780e-04 - mae: 0.0134 - val_loss: 1.9925e-04 - val_mae: 0.0123
Epoch 73/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.4004e-04 - mae: 0.0162 - val_loss: 6.2128e-05 - val_mae: 0.0059
Epoch 74/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.6701e-04 - mae: 0.0146 - val_loss: 8.1227e-05 - val_mae: 0.0069
Epoch 75/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.8208e-04 - mae: 0.0129 - val_loss: 4.6064e-05 - val_mae: 0.0050
Epoch 76/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.0239e-04 - mae: 0.0149 - val_loss: 1.6548e-04 - val_mae: 0.0118
Epoch 77/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.8861e-04 - mae: 0.0168 - val_loss: 5.9412e-05 - val_mae: 0.0061
Epoch 78/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0800e-04 - mae: 0.0134 - val_loss: 1.1483e-04 - val_mae: 0.0092
Epoch 79/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.0659e-04 - mae: 0.0168 - val_loss: 1.4421e-04 - val_mae: 0.0105
Epoch 80/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1082e-04 - mae: 0.0170 - val_loss: 8.2453e-05 - val_mae: 0.0069
Epoch 81/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.6185e-04 - mae: 0.0146 - val_loss: 5.8532e-05 - val_mae: 0.0056
Epoch 82/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.6300e-04 - mae: 0.0137 - val_loss: 8.5023e-05 - val_mae: 0.0074
Epoch 83/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.4803e-04 - mae: 0.0137 - val_loss: 3.7564e-05 - val_mae: 0.0046
Epoch 84/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.2892e-04 - mae: 0.0123 - val_loss: 7.5828e-05 - val_mae: 0.0067
Epoch 85/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.6893e-04 - mae: 0.0137 - val_loss: 3.9523e-05 - val_mae: 0.0046
Epoch 86/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.2697e-04 - mae: 0.0134 - val_loss: 3.7481e-05 - val_mae: 0.0044
Epoch 87/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.8752e-04 - mae: 0.0135 - val_loss: 3.8978e-05 - val_mae: 0.0045
Epoch 88/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.8066e-04 - mae: 0.0162 - val_loss: 6.0206e-05 - val_mae: 0.0061
Epoch 89/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 5.9555e-04 - mae: 0.0149 - val_loss: 4.7710e-05 - val_mae: 0.0055
Epoch 90/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.4161e-04 - mae: 0.0136 - val_loss: 4.0607e-05 - val_mae: 0.0048
Epoch 91/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.3063e-04 - mae: 0.0140 - val_loss: 6.6456e-05 - val_mae: 0.0067
Epoch 92/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.5672e-04 - mae: 0.0151 - val_loss: 4.3665e-05 - val_mae: 0.0048
Epoch 93/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.3264e-04 - mae: 0.0140 - val_loss: 4.9607e-05 - val_mae: 0.0052
Epoch 94/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.9061e-04 - mae: 0.0139 - val_loss: 8.1925e-05 - val_mae: 0.0073
Epoch 95/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 3.8341e-04 - mae: 0.0136 - val_loss: 3.7875e-05 - val_mae: 0.0045
Epoch 96/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.4231e-04 - mae: 0.0142 - val_loss: 4.4060e-05 - val_mae: 0.0050
Epoch 97/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.3886e-04 - mae: 0.0146 - val_loss: 5.9731e-05 - val_mae: 0.0059
Epoch 98/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.0787e-04 - mae: 0.0131 - val_loss: 1.7641e-04 - val_mae: 0.0117
Epoch 99/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.0109e-04 - mae: 0.0138 - val_loss: 1.6802e-04 - val_mae: 0.0115
Epoch 100/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.7143e-04 - mae: 0.0144 - val_loss: 4.0301e-05 - val_mae: 0.0047
Epoch 101/200
15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.7215e-04 - mae: 0.0140 - val_loss: 1.0663e-04 - val_mae: 0.0082
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 130ms/step

============================================================
EMD-GRU MODEL TRAINING SUMMARY
============================================================
Final epochs trained: 101
Best validation loss: 0.0000
Best validation MAE: 0.0044
Lookback period: 52 weeks
Number of IMFs used: 5

GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ gru (GRU)                            │ (None, 160)                 │          78,240 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_1 (Dropout)                  │ (None, 160)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 1)                   │             161 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 235,205 (918.77 KB)
 Trainable params: 78,401 (306.25 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 156,804 (612.52 KB)
============================================================
EVALUATION ON RECONSTRUCTED DATA
============================================================
MSE: 2017.1742
RMSE: 44.9130
MAE: 31.7065
MAPE: 0.02%
R²: 0.9881
Directional Accuracy: 91.09%

============================================================
EVALUATION ON ORIGINAL DATA
============================================================
MSE: 26172.9512
RMSE: 161.7806
MAE: 114.4259
MAPE: 0.07%
R²: 0.8542
Directional Accuracy: 31.68%
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
EMD-GRU Residual Analysis:
Residual mean: 22.6542
Residual std: 160.1866
Residual min: -525.7028
Residual max: 642.2941

==================================================
FUTURE FORECAST - EMD-GRU MODEL (NEXT 12 WEEKS)
==================================================
2024-11-03: 2118.15
2024-11-10: 2047.91
2024-11-17: 1985.80
2024-11-24: 1945.99
2024-12-01: 1918.58
2024-12-08: 1894.45
2024-12-15: 1873.39
2024-12-22: 1856.15
2024-12-29: 1840.26
2025-01-05: 1823.07
2025-01-12: 1804.18
2025-01-19: 1784.49
No description has been provided for this image
============================================================
EMD-GRU MODEL ADVANTAGES
============================================================
1. Adaptive Decomposition: EMD adapts to data characteristics
2. Multi-scale Analysis: Captures patterns at different time scales
3. Noise Reduction: Removes high-frequency noise effectively
4. GRU Efficiency: Faster training than LSTM with similar performance
5. Interpretability: IMFs provide insight into data components
6. Non-linear Handling: Effective for non-stationary, non-linear data
7. Data-driven: No predefined basis functions needed
In [48]:
# --- Step 11: Detailed Model Configuration Report & JSON Export (EMD-GRU) ---
import json
import datetime
import platform
from tensorflow.keras import backend as K

print("\n" + "="*60)
print("EMD + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)

report = {}

# Optimizer details
try:
    opt = final_model.optimizer
    opt_name = opt.__class__.__name__
    try:
        lr_val = float(K.get_value(opt.learning_rate))
    except Exception:
        opt_cfg = opt.get_config()
        lr_val = opt_cfg.get('learning_rate', None)
        try:
            lr_val = float(lr_val)
        except Exception:
            pass
    report['optimizer'] = opt_name
    report['learning_rate'] = lr_val
    print(f"Optimizer: {opt_name}")
    print(f"Learning Rate: {lr_val}")
except Exception as e:
    report['optimizer'] = str(e)
    print(f"Could not fetch optimizer details: {e}")

# Hyperparameters
try:
    report['best_hyperparameters'] = best_hp.values
    print("\nBest Hyperparameters (from tuner):")
    for k, v in best_hp.values.items():
        print(f"  {k}: {v}")
except Exception as e:
    report['best_hyperparameters'] = None
    print(f"No best hyperparameters found: {e}")

# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
    layer_info = {
        'index': i+1,
        'class_name': layer.__class__.__name__,
        'name': layer.name
    }
    if hasattr(layer, 'units'):
        layer_info['units'] = getattr(layer, 'units', None)
        print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
    else:
        print(f" Layer {i+1}: {layer.__class__.__name__}")
    if hasattr(layer, 'activation'):
        try:
            layer_info['activation'] = layer.activation.__name__
        except Exception:
            layer_info['activation'] = str(layer.activation)
    if hasattr(layer, 'rate'):
        layer_info['dropout_rate'] = getattr(layer, 'rate', None)
    if hasattr(layer, 'return_sequences'):
        layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
    try:
        layer_info['input_shape'] = layer.input_shape
        layer_info['output_shape'] = layer.output_shape
    except:
        layer_info['input_shape'] = None
        layer_info['output_shape'] = None
    layers_report.append(layer_info)

report['layers'] = layers_report

# Training summary
training_summary = {
    'lookback': lookback,
    'epochs_trained': len(history.history['loss']),
    'final_training_loss': float(history.history['loss'][-1]),
    'final_validation_loss': float(min(history.history['val_loss'])),
    'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
    'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
    'num_imfs_used': len(imfs)
}
report['training_summary'] = training_summary

print("\nTraining Summary:")
for k, v in training_summary.items():
    print(f" {k}: {v}")

# Evaluation
try:
    report['evaluation_metrics'] = {
        'reconstructed': metrics_reconstructed,
        'original': metrics_original
    }
    print("\nEvaluation Metrics attached.")
except Exception as e:
    report['evaluation_metrics'] = None
    print(f"Could not attach evaluation metrics: {e}")

# Residual stats
try:
    residuals_stats = {
        'residual_mean': float(np.mean(residuals)),
        'residual_std': float(np.std(residuals)),
        'residual_min': float(np.min(residuals)),
        'residual_max': float(np.max(residuals))
    }
    report['residuals'] = residuals_stats
    print("\nResiduals Summary attached.")
except Exception as e:
    print(f"Residual stats failed: {e}")

# Future forecast (if available)
try:
    forecast_report = {
        'dates': [str(d) for d in future_dates],
        'forecasted_prices': [float(p) for p in future_prices]
    }
    report['future_forecast'] = forecast_report
    print("\nFuture forecast added to report.")
except Exception as e:
    report['future_forecast'] = None
    print(f"Future forecast not added: {e}")

# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__

# Save JSON
report_filename = "emd_gru_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
    json.dump(report, f, indent=2, ensure_ascii=False)

print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
EMD + GRU MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0019911923445761204

Best Hyperparameters (from tuner):
  num_layers: 1
  units_0: 160
  dropout_0: 0.30000000000000004
  dense_layers: 0
  learning_rate: 0.0019911924591572727
  units_1: 224
  dropout_1: 0.1
  units_2: 160
  dropout_2: 0.1
  dense_units_0: 96
  dense_dropout_0: 0.5
  dense_units_1: 80
  dense_dropout_1: 0.4

Model Layers:
 Layer 1: GRU - units: 160
 Layer 2: Dropout
 Layer 3: Dense - units: 1

Training Summary:
 lookback: 52
 epochs_trained: 127
 final_training_loss: 0.0006086781504563987
 final_validation_loss: 3.647132689366117e-05
 final_training_mae: 0.014383490197360516
 final_validation_mae: 0.004545957315713167
 num_imfs_used: 5

Evaluation Metrics attached.

Residuals Summary attached.

Future forecast added to report.

Saved detailed report to: emd_gru_report.json
============================================================
REPORT COMPLETE
============================================================
In [31]:
plt.figure(figsize=(12, 6))





# Plot 3: Separate View - Actual vs Predicted

plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)

plt.title('Actual vs Predicted - EMD-GRU Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result221.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [34]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

warnings.filterwarnings("ignore")

df = pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx", parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values

# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
from PyEMD import EMD
emd = EMD()
imfs = emd.emd(data, max_imf=5)

# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")

# --- Step 2: Data Split ---
lookback = 0  # Not needed in SARIMA
total_size = len(data)
train_size = int(total_size*0.7)
val_size = int(total_size*0.15)
test_size = total_size - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]

start_test_idx = train_size+val_size

test_dates = df.index[start_test_idx:start_test_idx+test_size]

# --- Step 3: SARIMA Modeling for each IMF ---
param_grid = {
    'order': [(2,1,0),(5,1,0),(3,0,2),(2,0,3)],
    'seasonal_order': [(0,1,1,26), (1,1,1,26), (0,1,0,26),(2,0,1,26),(2,0,2,26),(2,0,0,26)]
}
from itertools import product

best_imf_models = {}

def train_sarima(imf, order, seasonal_order, train_size, val_size):
    try:
        model = SARIMAX(imf[:train_size],
                        order=order,
                        seasonal_order=seasonal_order,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
        model_fit = model.fit(disp=False)
        val_pred = model_fit.forecast(steps=val_size)
        return mean_squared_error(imf[train_size:train_size+val_size], val_pred), model_fit
    except:
        return np.inf, None

for i, imf in enumerate(imfs, start=1):
    print(f"\nTraining SARIMA for IMF {i} (Variance: {np.var(imf):.2f})")
    scores_models = [
        train_sarima(imf, order, seas_order, train_size, val_size)
        for order, seas_order in product(param_grid['order'], param_grid['seasonal_order'])
    ]
    scores, models = zip(*scores_models)
    best_idx = np.argmin(scores)
    best_imf_models[f'IMF_{i}'] = models[best_idx]
    if models[best_idx]:
        print(f"Best params: {models[best_idx].model.order}x{models[best_idx].model.seasonal_order} | MSE: {scores[best_idx]:.4f}")

# --- Step 4: Forecast and Reconstruction ---
test_predictions = np.zeros(test_size)
for imf_name, model in best_imf_models.items():
    if model:
        test_predictions += model.forecast(steps=test_size)

# --- Step 5: Evaluation ---
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(test_data, test_predictions)
mape = mean_absolute_percentage_error(test_data, test_predictions)
r2 = r2_score(test_data, test_predictions)
print("\nEMD-SARIMA Test Evaluation:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}")
print(f"R²: {r2:.4f}")

# --- Step 6: Actual vs Predicted Plot ---
plt.figure(figsize=(12, 6))
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, test_predictions, label='EMD-SARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.title("EMD-SARIMA: Actual vs Predicted Cardamom Prices (Test Set)")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_sarima_actual_vs_predicted.png", dpi=300, bbox_inches='tight')
plt.show()
Performing EMD decomposition...
Selected 5 meaningful IMFs

Training SARIMA for IMF 1 (Variance: 83003.95)
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 2981.6178

Training SARIMA for IMF 2 (Variance: 35017.08)
Best params: (2, 0, 3)x(2, 0, 2, 26) | MSE: 5921.9670

Training SARIMA for IMF 3 (Variance: 49802.40)
Best params: (2, 1, 0)x(2, 0, 0, 26) | MSE: 14621.6606

Training SARIMA for IMF 4 (Variance: 99210.38)
Best params: (3, 0, 2)x(1, 1, 1, 26) | MSE: 8451.3243

Training SARIMA for IMF 5 (Variance: 175117.02)
Best params: (5, 1, 0)x(0, 1, 1, 26) | MSE: 0.3374

EMD-SARIMA Test Evaluation:
MSE: 681121.5636
RMSE: 825.3009
MAE: 708.2112
MAPE: 0.43
R²: -2.4266
No description has been provided for this image
In [1]:
# Print the SARIMA model summary for each selected IMF
for imf_name, model in best_imf_models.items():
    if model:
        print(f"\n{imf_name} SARIMA Model Summary:")
        print(model.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 2
      1 # Print the SARIMA model summary for each selected IMF
----> 2 for imf_name, model in best_imf_models.items():
      3     if model:
      4         print(f"\n{imf_name} SARIMA Model Summary:")

NameError: name 'best_imf_models' is not defined
In [ ]: